diff --git a/benchmarks/linear_programming/cuopt/parse_scaling_logs.py b/benchmarks/linear_programming/cuopt/parse_scaling_logs.py new file mode 100644 index 0000000000..7e46c0c3e4 --- /dev/null +++ b/benchmarks/linear_programming/cuopt/parse_scaling_logs.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +"""Parse MIP scaling benchmark logs into CSV. + +Reads log output (from stdin or file arguments) produced by the cuOpt MIP +solver with the MIP_SCALING_METRICS / MIP_SCALING_SUMMARY / MIP_OBJ_SCALING / +MIP_GAP_METRICS log lines and the standard solver output. + +Usage: + python parse_scaling_logs.py < benchmark_output.log > results.csv + python parse_scaling_logs.py log1.log log2.log > results.csv +""" + +import csv +import re +import sys +from collections import defaultdict + + +def _float_or_na(s): + try: + return float(s) + except (ValueError, TypeError): + return "" + + +def _int_or_na(s): + try: + return int(s) + except (ValueError, TypeError): + return "" + + +INSTANCE_RE = re.compile( + r"(?:Reading|Solving|instance[=:]\s*)(\S+?)(?:\.mps)?(?:\s|$)", re.I +) +FEASIBLE_RE = re.compile( + r"Found new best solution.*objective[=:\s]+([\d.eE+\-]+)", re.I +) +OPTIMAL_RE = re.compile(r"Optimal solution found", re.I) +OBJ_RE = re.compile(r"Objective\s+([\d.eE+\-]+)", re.I) +REL_GAP_RE = re.compile(r"relative_mip_gap\s+([\d.eE+\-]+)", re.I) +SIMPLEX_RE = re.compile(r"simplex_iterations\s+([\d,]+)", re.I) +NODES_RE = re.compile(r"Explored\s+(\d+)\s+nodes\s+in\s+([\d.]+)s", re.I) +INFEASIBLE_RE = re.compile(r"Integer infeasible|Infeasible", re.I) + +SCALING_METRICS_RE = re.compile( + r"MIP_SCALING_METRICS\s+iteration=(\d+)\s+log2_spread=([\d.eE+\-]+)\s+" + r"target_norm=([\d.eE+\-]+)\s+scaled_rows=(\d+)\s+valid_rows=(\d+)" +) +SCALING_SUMMARY_RE = re.compile( + r"MIP_SCALING_SUMMARY\s+rows=(\d+)\s+bigm_rows=(\d+)\s+final_spread=([\d.eE+\-inf]+)" +) +OBJ_SCALING_RE = re.compile( + r"MIP_OBJ_SCALING\s+(applied|skipped).*?(?:scale=([\d.eE+\-]+))?" + r".*?(?:new_scaling_factor=([\d.eE+\-]+))?" +) +GAP_METRICS_RE = re.compile( + r"MIP_GAP_METRICS\s+abs_gap_user=([\d.eE+\-]+)\s+rel_gap=([\d.eE+\-]+)\s+" + r"obj_user=([\d.eE+\-]+)\s+bound_user=([\d.eE+\-]+)\s+obj_scale=([\d.eE+\-]+)" +) +SCALING_SKIPPED_RE = re.compile(r"MIP row scaling skipped", re.I) +SOL_FOUND_RE = re.compile(r"sol_found=(\d+).*?obj_val=([\d.eE+\-inf]+)", re.I) +RUN_MPS_RE = re.compile(r"run_mps\s+(\S+)", re.I) + + +def parse_logs(lines): + records = [] + cur = defaultdict(lambda: "") + instance_name = "" + + def flush(): + nonlocal instance_name + if instance_name: + cur["instance"] = instance_name + records.append(dict(cur)) + cur.clear() + instance_name = "" + + for line in lines: + line = line.rstrip("\n") + + m = RUN_MPS_RE.search(line) + if m: + flush() + instance_name = m.group(1).replace(".mps", "") + continue + + m = INSTANCE_RE.search(line) + if m and not instance_name: + instance_name = m.group(1) + + m = SCALING_SKIPPED_RE.search(line) + if m: + cur["scaling_applied"] = "no" + + m = SCALING_METRICS_RE.search(line) + if m: + cur["scaling_applied"] = "yes" + cur["scaling_last_iteration"] = m.group(1) + cur["scaling_last_spread"] = m.group(2) + cur["scaling_target_norm"] = m.group(3) + + m = SCALING_SUMMARY_RE.search(line) + if m: + cur["rows"] = m.group(1) + cur["bigm_rows"] = m.group(2) + cur["final_spread"] = m.group(3) + + m = OBJ_SCALING_RE.search(line) + if m: + cur["obj_scaling_status"] = m.group(1) + if m.group(2): + cur["obj_scaling_factor"] = m.group(2) + if m.group(3): + cur["obj_new_scaling_factor"] = m.group(3) + + m = FEASIBLE_RE.search(line) + if m: + cur["feasible"] = 1 + cur["objective"] = m.group(1) + + m = OPTIMAL_RE.search(line) + if m: + cur["optimal"] = 1 + + m = NODES_RE.search(line) + if m: + cur["nodes_explored"] = m.group(1) + cur["solve_time_s"] = m.group(2) + + m = SIMPLEX_RE.search(line) + if m: + cur["simplex_iters"] = m.group(1).replace(",", "") + + m = GAP_METRICS_RE.search(line) + if m: + cur["abs_gap_user"] = m.group(1) + cur["rel_gap"] = m.group(2) + cur["obj_user"] = m.group(3) + cur["bound_user"] = m.group(4) + cur["obj_scale"] = m.group(5) + + m = SOL_FOUND_RE.search(line) + if m: + cur["feasible"] = int(m.group(1)) + cur["objective"] = m.group(2) + + m = INFEASIBLE_RE.search(line) + if m: + cur["feasible"] = 0 + + flush() + return records + + +COLUMNS = [ + "instance", + "feasible", + "optimal", + "objective", + "rel_gap", + "abs_gap_user", + "obj_user", + "bound_user", + "solve_time_s", + "simplex_iters", + "nodes_explored", + "scaling_applied", + "bigm_rows", + "rows", + "final_spread", + "scaling_last_iteration", + "scaling_target_norm", + "obj_scaling_status", + "obj_scaling_factor", + "obj_new_scaling_factor", + "obj_scale", +] + + +def main(): + if len(sys.argv) > 1: + lines = [] + for path in sys.argv[1:]: + with open(path) as f: + lines.extend(f.readlines()) + else: + lines = sys.stdin.readlines() + + records = parse_logs(lines) + + writer = csv.DictWriter( + sys.stdout, fieldnames=COLUMNS, extrasaction="ignore" + ) + writer.writeheader() + for rec in records: + writer.writerow(rec) + + n_feasible = sum(1 for r in records if r.get("feasible") == 1) + n_optimal = sum(1 for r in records if r.get("optimal") == 1) + n_total = len(records) + print( + f"# Summary: {n_total} instances, {n_feasible} feasible, {n_optimal} optimal", + file=sys.stderr, + ) + + +if __name__ == "__main__": + main() diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 9b79dff8af..e2a244844b 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -209,6 +209,7 @@ int run_single_file(std::string file_path, settings.tolerances.absolute_tolerance = 1e-6; settings.presolver = cuopt::linear_programming::presolver_t::Default; settings.reliability_branching = reliability_branching; + settings.mip_scaling = true; settings.seed = 42; cuopt::linear_programming::benchmark_info_t benchmark_info; settings.benchmark_info_ptr = &benchmark_info; diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 6d32cd5ed9..60534a36fa 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -107,7 +107,7 @@ class mip_solver_settings_t { /** Initial primal solutions */ std::vector>> initial_solutions; - bool mip_scaling = false; + bool mip_scaling = true; presolver_t presolver{presolver_t::Default}; /** * @brief Determinism mode for MIP solver. diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 6ce9a4f4d0..790e6b399d 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -695,9 +695,9 @@ void branch_and_bound_t::set_final_solution(mip_solution_t& settings_.heuristic_preemption_callback(); } - f_t gap = upper_bound_ - lower_bound; f_t obj = compute_user_objective(original_lp_, upper_bound_.load()); f_t user_bound = compute_user_objective(original_lp_, lower_bound); + f_t gap = std::abs(obj - user_bound); f_t gap_rel = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); bool is_maximization = original_lp_.obj_scale < 0.0; @@ -709,6 +709,13 @@ void branch_and_bound_t::set_final_solution(mip_solution_t& obj, is_maximization ? "Upper" : "Lower", user_bound); + settings_.log.printf( + "MIP_GAP_METRICS abs_gap_user=%e rel_gap=%e obj_user=%.16e bound_user=%.16e obj_scale=%e\n", + gap, + gap_rel, + obj, + user_bound, + original_lp_.obj_scale); if (gap <= settings_.absolute_mip_gap_tol || gap_rel <= settings_.relative_mip_gap_tol) { solver_status_ = mip_status_t::OPTIMAL; @@ -1155,7 +1162,10 @@ std::pair branch_and_bound_t::upd Policy& policy) { constexpr f_t inf = std::numeric_limits::infinity(); - const f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10; + const f_t obj_scale_mag = std::abs(original_lp_.obj_scale); + const f_t abs_fathom_tol = obj_scale_mag > f_t(0) + ? settings_.absolute_mip_gap_tol / (obj_scale_mag * f_t(10)) + : settings_.absolute_mip_gap_tol / f_t(10); lp_problem_t& leaf_problem = worker->leaf_problem; lp_solution_t& leaf_solution = worker->leaf_solution; const f_t upper_bound = policy.upper_bound(); @@ -1606,7 +1616,8 @@ void branch_and_bound_t::run_scheduler() #endif f_t lower_bound = get_lower_bound(); - f_t abs_gap = upper_bound_ - lower_bound; + f_t abs_gap = std::abs(compute_user_objective(original_lp_, upper_bound_.load()) - + compute_user_objective(original_lp_, lower_bound)); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); i_t last_node_depth = 0; i_t last_int_infeas = 0; @@ -1616,7 +1627,8 @@ void branch_and_bound_t::run_scheduler() (active_workers_per_strategy_[0] > 0 || node_queue_.best_first_queue_size() > 0)) { bool launched_any_task = false; lower_bound = get_lower_bound(); - abs_gap = upper_bound_ - lower_bound; + abs_gap = std::abs(compute_user_objective(original_lp_, upper_bound_.load()) - + compute_user_objective(original_lp_, lower_bound)); rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); repair_heuristic_solutions(); @@ -1731,14 +1743,16 @@ void branch_and_bound_t::single_threaded_solve() branch_and_bound_worker_t worker(0, original_lp_, Arow_, var_types_, settings_); f_t lower_bound = get_lower_bound(); - f_t abs_gap = upper_bound_ - lower_bound; + f_t abs_gap = std::abs(compute_user_objective(original_lp_, upper_bound_.load()) - + compute_user_objective(original_lp_, lower_bound)); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol && rel_gap > settings_.relative_mip_gap_tol && node_queue_.best_first_queue_size() > 0) { bool launched_any_task = false; lower_bound = get_lower_bound(); - abs_gap = upper_bound_ - lower_bound; + abs_gap = std::abs(compute_user_objective(original_lp_, upper_bound_.load()) - + compute_user_objective(original_lp_, lower_bound)); rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound); repair_heuristic_solutions(); @@ -2317,7 +2331,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut report(' ', obj, root_objective_, 0, num_fractional); f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), root_objective_); - f_t abs_gap = upper_bound_.load() - root_objective_; + f_t abs_gap = std::abs(compute_user_objective(original_lp_, upper_bound_.load()) - + compute_user_objective(original_lp_, root_objective_)); if (rel_gap < settings_.relative_mip_gap_tol || abs_gap < settings_.absolute_mip_gap_tol) { set_solution_at_root(solution, cut_info); set_final_solution(solution, root_objective_); diff --git a/cpp/src/dual_simplex/user_problem.hpp b/cpp/src/dual_simplex/user_problem.hpp index f50a6d33a5..73c4c391be 100644 --- a/cpp/src/dual_simplex/user_problem.hpp +++ b/cpp/src/dual_simplex/user_problem.hpp @@ -46,7 +46,7 @@ struct user_problem_t { std::vector row_names; std::vector col_names; f_t obj_constant; - f_t obj_scale; // 1.0 for min, -1.0 for max + f_t obj_scale; // positive for min, netagive for max bool objective_is_integral{false}; std::vector var_types; std::vector Q_offsets; diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt index a200d4265b..5193c0dd91 100644 --- a/cpp/src/mip_heuristics/CMakeLists.txt +++ b/cpp/src/mip_heuristics/CMakeLists.txt @@ -18,6 +18,7 @@ set(MIP_LP_NECESSARY_FILES # Files that are MIP-specific and not needed for pure LP set(MIP_NON_LP_FILES + ${CMAKE_CURRENT_SOURCE_DIR}/mip_scaling_strategy.cu ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu ${CMAKE_CURRENT_SOURCE_DIR}/solver.cu ${CMAKE_CURRENT_SOURCE_DIR}/diversity/assignment_hash_map.cu diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index ed165fe610..88b413aefa 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -180,6 +180,7 @@ bool diversity_manager_t::run_presolve(f_t time_limit, timer_t global_ raft::common::nvtx::range fun_scope("run_presolve"); CUOPT_LOG_INFO("Running presolve!"); timer_t presolve_timer(time_limit); + auto term_crit = ls.constraint_prop.bounds_update.solve(*problem_ptr); if (ls.constraint_prop.bounds_update.infeas_constraints_count > 0) { stats.presolve_time = timer.elapsed_time(); @@ -412,23 +413,26 @@ solution_t diversity_manager_t::run_solver() } else if (!fj_only_run) { convert_greater_to_less(*problem_ptr); - f_t tolerance_divisor = - problem_ptr->tolerances.absolute_tolerance / problem_ptr->tolerances.relative_tolerance; - if (tolerance_divisor == 0) { tolerance_divisor = 1; } f_t absolute_tolerance = context.settings.tolerances.absolute_tolerance; + // f_t tolerance_divisor = 100.; pdlp_solver_settings_t pdlp_settings{}; - pdlp_settings.tolerances.relative_primal_tolerance = absolute_tolerance / tolerance_divisor; - pdlp_settings.tolerances.relative_dual_tolerance = absolute_tolerance / tolerance_divisor; - pdlp_settings.time_limit = lp_time_limit; - pdlp_settings.first_primal_feasible = false; - pdlp_settings.concurrent_halt = &global_concurrent_halt; - pdlp_settings.method = method_t::Concurrent; - pdlp_settings.inside_mip = true; - pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; - pdlp_settings.num_gpus = context.settings.num_gpus; - pdlp_settings.presolver = presolver_t::None; - + pdlp_settings.tolerances.absolute_dual_tolerance = absolute_tolerance; + pdlp_settings.tolerances.relative_dual_tolerance = + context.settings.tolerances.relative_tolerance; + pdlp_settings.tolerances.absolute_primal_tolerance = absolute_tolerance; + pdlp_settings.tolerances.relative_primal_tolerance = + context.settings.tolerances.relative_tolerance; + pdlp_settings.time_limit = lp_time_limit; + pdlp_settings.first_primal_feasible = false; + pdlp_settings.concurrent_halt = &global_concurrent_halt; + pdlp_settings.method = method_t::Concurrent; + pdlp_settings.inside_mip = true; + pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; + pdlp_settings.num_gpus = context.settings.num_gpus; + pdlp_settings.presolver = presolver_t::None; + pdlp_settings.per_constraint_residual = true; + set_pdlp_solver_mode(pdlp_settings); timer_t lp_timer(lp_time_limit); auto lp_result = solve_lp_with_method(*problem_ptr, pdlp_settings, lp_timer); diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cu b/cpp/src/mip_heuristics/diversity/lns/rins.cu index 2fbe79ba34..cb0aa82ea0 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cu +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cu @@ -223,8 +223,7 @@ void rins_t::run_rins() std::vector> rins_solution_queue; - mip_solver_context_t fj_context( - &rins_handle, &fixed_problem, context.settings, context.scaling); + mip_solver_context_t fj_context(&rins_handle, &fixed_problem, context.settings); fj_t fj(fj_context); solution_t fj_solution(fixed_problem); fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment, rins_handle.get_stream())); diff --git a/cpp/src/mip_heuristics/diversity/population.cu b/cpp/src/mip_heuristics/diversity/population.cu index bca87223d9..bb0fdd6d11 100644 --- a/cpp/src/mip_heuristics/diversity/population.cu +++ b/cpp/src/mip_heuristics/diversity/population.cu @@ -265,10 +265,6 @@ void population_t::invoke_get_solution_callback( f_t user_bound = context.stats.get_solution_bound(); solution_t temp_sol(sol); problem_ptr->post_process_assignment(temp_sol.assignment); - if (context.settings.mip_scaling) { - rmm::device_uvector dummy(0, temp_sol.handle_ptr->get_stream()); - context.scaling.unscale_solutions(temp_sol.assignment, dummy); - } if (problem_ptr->has_papilo_presolve_data()) { problem_ptr->papilo_uncrush_assignment(temp_sol.assignment); } @@ -308,10 +304,8 @@ void population_t::run_solution_callbacks(solution_t& sol) invoke_get_solution_callback(sol, get_sol_callback); } } - // save the best objective here, because we might not have been able to return the solution to - // the user because of the unscaling that causes infeasibility. - // This prevents an issue of repaired, or a fully feasible solution being reported in the call - // back in next run. + // Save the best objective here even if callback handling later exits early. + // This prevents older solutions from being reported as "new best" in subsequent callbacks. best_feasible_objective = sol.get_objective(); } @@ -344,7 +338,6 @@ void population_t::run_solution_callbacks(solution_t& sol) incumbent_assignment.size(), sol.handle_ptr->get_stream()); - if (context.settings.mip_scaling) { context.scaling.scale_solutions(incumbent_assignment); } bool is_valid = problem_ptr->pre_process_assignment(incumbent_assignment); if (!is_valid) { return; } cuopt_assert(outside_sol.assignment.size() == incumbent_assignment.size(), diff --git a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh index b2f7f80066..e6bb49e6b9 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh @@ -14,6 +14,7 @@ #include #include #include +#include namespace cuopt::linear_programming::detail { diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu new file mode 100644 index 0000000000..8c9b168471 --- /dev/null +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu @@ -0,0 +1,806 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +constexpr int row_scaling_max_iterations = 8; +constexpr double row_scaling_min_initial_log2_spread = 12.0; +constexpr int row_scaling_factor_exponent = 5; +constexpr int row_scaling_big_m_soft_factor_exponent = 4; +constexpr double row_scaling_min_factor = + 1.0 / static_cast(std::uint64_t{1} << row_scaling_factor_exponent); +constexpr double row_scaling_max_factor = + static_cast(std::uint64_t{1} << row_scaling_factor_exponent); +constexpr double row_scaling_big_m_soft_min_factor = + 1.0 / static_cast(std::uint64_t{1} << row_scaling_big_m_soft_factor_exponent); +constexpr double row_scaling_big_m_soft_max_factor = 1.0; +constexpr double row_scaling_spread_rel_tol = 1.0e-2; +constexpr double integer_coefficient_rel_tol = 1.0e-6; +constexpr double integer_multiplier_rounding_tolerance = 1.0e-6; +constexpr double min_abs_objective_coefficient_threshold = 1.0e-2; +constexpr double max_obj_scaling_coefficient = 1.0e3; + +constexpr int cumulative_row_scaling_exponent = 8; +constexpr double cumulative_row_scaling_min = + 1.0 / static_cast(std::uint64_t{1} << cumulative_row_scaling_exponent); +constexpr double cumulative_row_scaling_max = + static_cast(std::uint64_t{1} << cumulative_row_scaling_exponent); + +constexpr double post_scaling_max_ratio_warn = 1.0e15; + +constexpr double big_m_abs_threshold = 1.0e4; +constexpr double big_m_ratio_threshold = 1.0e4; + +template +struct abs_value_transform_t { + __device__ f_t operator()(f_t value) const { return raft::abs(value); } +}; + +template +struct nonzero_abs_or_inf_transform_t { + __device__ f_t operator()(f_t value) const + { + const f_t abs_value = raft::abs(value); + return abs_value > f_t(0) ? abs_value : std::numeric_limits::infinity(); + } +}; + +template +struct nonzero_count_transform_t { + __device__ i_t operator()(f_t value) const { return raft::abs(value) > f_t(0) ? i_t(1) : i_t(0); } +}; + +template +struct max_op_t { + __host__ __device__ item_t operator()(const item_t& lhs, const item_t& rhs) const + { + return lhs > rhs ? lhs : rhs; + } +}; + +template +struct min_op_t { + __host__ __device__ item_t operator()(const item_t& lhs, const item_t& rhs) const + { + return lhs < rhs ? lhs : rhs; + } +}; + +struct gcd_op_t { + __host__ __device__ std::int64_t operator()(std::int64_t lhs, std::int64_t rhs) const + { + lhs = lhs < 0 ? -lhs : lhs; + rhs = rhs < 0 ? -rhs : rhs; + if (lhs == 0) { return rhs; } + if (rhs == 0) { return lhs; } + while (rhs != 0) { + const std::int64_t remainder = lhs % rhs; + lhs = rhs; + rhs = remainder; + } + return lhs; + } +}; + +template +struct integer_coeff_for_integer_var_transform_t { + __device__ std::int64_t operator()(thrust::tuple coeff_with_type) const + { + const f_t coefficient = thrust::get<0>(coeff_with_type); + const var_t var_type = thrust::get<1>(coeff_with_type); + if (var_type != var_t::INTEGER) { return std::int64_t{0}; } + + const f_t abs_coefficient = raft::abs(coefficient); + if (!isfinite(abs_coefficient) || abs_coefficient <= f_t(0)) { return std::int64_t{0}; } + + const f_t rounded_abs_coefficient = round(abs_coefficient); + const f_t tolerance_scale = abs_coefficient > f_t(1) ? abs_coefficient : f_t(1); + const f_t integrality_tolerance = + static_cast(integer_coefficient_rel_tol) * tolerance_scale; + if (raft::abs(abs_coefficient - rounded_abs_coefficient) > integrality_tolerance) { + return std::int64_t{0}; + } + if (rounded_abs_coefficient <= f_t(0) || + rounded_abs_coefficient > static_cast(std::numeric_limits::max())) { + return std::int64_t{0}; + } + return static_cast(rounded_abs_coefficient); + } +}; + +template +void compute_row_inf_norm( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::cuda_stream_view stream_view) +{ + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + auto coeff_abs_iter = + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); + size_t current_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + current_bytes, + coeff_abs_iter, + row_inf_norm.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view)); +} + +template +void compute_row_integer_gcd( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_integer_gcd, + rmm::cuda_stream_view stream_view) +{ + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_indices = op_problem.get_constraint_matrix_indices(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + const auto& variable_types = op_problem.get_variable_types(); + if (variable_types.size() != static_cast(op_problem.get_n_variables())) { + thrust::fill(op_problem.get_handle_ptr()->get_thrust_policy(), + row_integer_gcd.begin(), + row_integer_gcd.end(), + std::int64_t{0}); + return; + } + auto variable_type_per_nnz = + thrust::make_permutation_iterator(variable_types.data(), matrix_indices.data()); + auto coeff_and_type_iter = + thrust::make_zip_iterator(thrust::make_tuple(matrix_values.data(), variable_type_per_nnz)); + auto integer_coeff_iter = thrust::make_transform_iterator( + coeff_and_type_iter, integer_coeff_for_integer_var_transform_t{}); + size_t current_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + current_bytes, + integer_coeff_iter, + row_integer_gcd.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + gcd_op_t{}, + std::int64_t{0}, + stream_view)); +} + +template +void compute_big_m_skip_rows( + const cuopt::linear_programming::optimization_problem_t& op_problem, + rmm::device_uvector& temp_storage, + size_t temp_storage_bytes, + rmm::device_uvector& row_inf_norm, + rmm::device_uvector& row_min_nonzero, + rmm::device_uvector& row_nonzero_count, + rmm::device_uvector& row_skip_scaling) +{ + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + const auto stream_view = op_problem.get_handle_ptr()->get_stream(); + auto coeff_abs_iter = + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); + auto coeff_nonzero_min_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_abs_or_inf_transform_t{}); + auto coeff_nonzero_count_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_count_transform_t{}); + + size_t max_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + max_bytes, + coeff_abs_iter, + row_inf_norm.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view)); + size_t min_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + min_bytes, + coeff_nonzero_min_iter, + row_min_nonzero.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + min_op_t{}, + std::numeric_limits::infinity(), + stream_view)); + size_t count_bytes = temp_storage_bytes; + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(), + count_bytes, + coeff_nonzero_count_iter, + row_nonzero_count.data(), + op_problem.get_n_constraints(), + matrix_offsets.data(), + matrix_offsets.data() + 1, + thrust::plus{}, + i_t(0), + stream_view)); + + auto row_begin = thrust::make_zip_iterator( + thrust::make_tuple(row_inf_norm.begin(), row_min_nonzero.begin(), row_nonzero_count.begin())); + auto row_end = thrust::make_zip_iterator( + thrust::make_tuple(row_inf_norm.end(), row_min_nonzero.end(), row_nonzero_count.end())); + thrust::transform( + op_problem.get_handle_ptr()->get_thrust_policy(), + row_begin, + row_end, + row_skip_scaling.begin(), + [] __device__(auto row_info) -> i_t { + const f_t row_norm = thrust::get<0>(row_info); + const f_t row_min_non_zero = thrust::get<1>(row_info); + const i_t row_non_zero_size = thrust::get<2>(row_info); + if (row_non_zero_size < i_t(2) || row_min_non_zero >= std::numeric_limits::infinity()) { + return i_t(0); + } + + const f_t row_ratio = row_norm / row_min_non_zero; + return row_norm >= static_cast(big_m_abs_threshold) && + row_ratio >= static_cast(big_m_ratio_threshold) + ? i_t(1) + : i_t(0); + }); +} + +template +void scale_objective(cuopt::linear_programming::optimization_problem_t& op_problem) +{ + auto& obj_coefficients = op_problem.get_objective_coefficients(); + const i_t n_cols = op_problem.get_n_variables(); + if (n_cols == 0) { return; } + + const auto* handle_ptr = op_problem.get_handle_ptr(); + + f_t min_abs_obj = thrust::transform_reduce(handle_ptr->get_thrust_policy(), + obj_coefficients.begin(), + obj_coefficients.end(), + nonzero_abs_or_inf_transform_t{}, + std::numeric_limits::infinity(), + min_op_t{}); + + f_t max_abs_obj = thrust::transform_reduce(handle_ptr->get_thrust_policy(), + obj_coefficients.begin(), + obj_coefficients.end(), + abs_value_transform_t{}, + f_t(0), + max_op_t{}); + + if (!std::isfinite(static_cast(min_abs_obj)) || min_abs_obj <= f_t(0) || + max_abs_obj <= f_t(0)) { + CUOPT_LOG_INFO("MIP_OBJ_SCALING skipped: no finite nonzero objective coefficients"); + return; + } + + if (static_cast(min_abs_obj) >= min_abs_objective_coefficient_threshold) { + CUOPT_LOG_INFO("MIP_OBJ_SCALING skipped: min_abs_coeff=%g already above threshold=%g", + static_cast(min_abs_obj), + min_abs_objective_coefficient_threshold); + return; + } + + double raw_scale = min_abs_objective_coefficient_threshold / static_cast(min_abs_obj); + double scale = std::min(raw_scale, max_obj_scaling_coefficient); + + double post_max = static_cast(max_abs_obj) * scale; + if (post_max > 1.0e6) { + CUOPT_LOG_INFO("MIP_OBJ_SCALING skipped: would push max_coeff from %g to %g (limit 1e6)", + static_cast(max_abs_obj), + post_max); + return; + } + + f_t scale_f = static_cast(scale); + thrust::transform(handle_ptr->get_thrust_policy(), + obj_coefficients.begin(), + obj_coefficients.end(), + obj_coefficients.begin(), + [scale_f] __device__(f_t c) -> f_t { return c * scale_f; }); + + f_t old_sf = op_problem.get_objective_scaling_factor(); + f_t old_off = op_problem.get_objective_offset(); + op_problem.set_objective_scaling_factor(old_sf / scale_f); + op_problem.set_objective_offset(old_off * scale_f); + + CUOPT_LOG_INFO( + "MIP_OBJ_SCALING applied: min_abs_coeff=%g max_abs_coeff=%g scale=%g new_scaling_factor=%g", + static_cast(min_abs_obj), + static_cast(max_abs_obj), + scale, + static_cast(old_sf / scale_f)); +} + +template +mip_scaling_strategy_t::mip_scaling_strategy_t( + typename mip_scaling_strategy_t::optimization_problem_type_t& op_problem_scaled) + : handle_ptr_(op_problem_scaled.get_handle_ptr()), + stream_view_(handle_ptr_->get_stream()), + op_problem_scaled_(op_problem_scaled) +{ +} + +template +size_t dry_run_cub(const cuopt::linear_programming::optimization_problem_t& op_problem, + i_t n_rows, + rmm::device_uvector& row_inf_norm, + rmm::device_uvector& row_min_nonzero, + rmm::device_uvector& row_nonzero_count, + rmm::device_uvector& row_integer_gcd, + rmm::cuda_stream_view stream_view) +{ + const auto& matrix_values = op_problem.get_constraint_matrix_values(); + const auto& matrix_indices = op_problem.get_constraint_matrix_indices(); + const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets(); + const auto& variable_types = op_problem.get_variable_types(); + size_t temp_storage_bytes = 0; + size_t current_required_bytes = 0; + + auto coeff_abs_iter = + thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_abs_iter, + row_inf_norm.data(), + n_rows, + matrix_offsets.data(), + matrix_offsets.data() + 1, + max_op_t{}, + f_t(0), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + auto coeff_nonzero_min_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_abs_or_inf_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_nonzero_min_iter, + row_min_nonzero.data(), + n_rows, + matrix_offsets.data(), + matrix_offsets.data() + 1, + min_op_t{}, + std::numeric_limits::infinity(), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + auto coeff_nonzero_count_iter = + thrust::make_transform_iterator(matrix_values.data(), nonzero_count_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + coeff_nonzero_count_iter, + row_nonzero_count.data(), + n_rows, + matrix_offsets.data(), + matrix_offsets.data() + 1, + thrust::plus{}, + i_t(0), + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + + if (variable_types.size() == static_cast(op_problem.get_n_variables())) { + auto variable_type_per_nnz = + thrust::make_permutation_iterator(variable_types.data(), matrix_indices.data()); + auto coeff_and_type_iter = + thrust::make_zip_iterator(thrust::make_tuple(matrix_values.data(), variable_type_per_nnz)); + auto integer_coeff_iter = thrust::make_transform_iterator( + coeff_and_type_iter, integer_coeff_for_integer_var_transform_t{}); + RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr, + current_required_bytes, + integer_coeff_iter, + row_integer_gcd.data(), + n_rows, + matrix_offsets.data(), + matrix_offsets.data() + 1, + gcd_op_t{}, + std::int64_t{0}, + stream_view)); + temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes); + } + + return temp_storage_bytes; +} + +template +void mip_scaling_strategy_t::scale_problem() +{ + raft::common::nvtx::range fun_scope("mip_scale_problem"); + + auto& matrix_values = op_problem_scaled_.get_constraint_matrix_values(); + auto& matrix_offsets = op_problem_scaled_.get_constraint_matrix_offsets(); + auto& constraint_bounds = op_problem_scaled_.get_constraint_bounds(); + auto& constraint_lower_bounds = op_problem_scaled_.get_constraint_lower_bounds(); + auto& constraint_upper_bounds = op_problem_scaled_.get_constraint_upper_bounds(); + const i_t n_rows = op_problem_scaled_.get_n_constraints(); + const i_t n_cols = op_problem_scaled_.get_n_variables(); + const i_t nnz = op_problem_scaled_.get_nnz(); + + scale_objective(op_problem_scaled_); + + if (n_rows == 0 || nnz <= 0) { return; } + cuopt_assert(constraint_lower_bounds.size() == static_cast(n_rows), + "constraint_lower_bounds must be provided for row scaling"); + cuopt_assert(constraint_upper_bounds.size() == static_cast(n_rows), + "constraint_upper_bounds must be provided for row scaling"); + cuopt_assert(constraint_bounds.size() == size_t{0} || + constraint_bounds.size() == static_cast(n_rows), + "constraint_bounds must be empty or have one value per constraint"); + + rmm::device_uvector row_inf_norm(static_cast(n_rows), stream_view_); + rmm::device_uvector row_min_nonzero(static_cast(n_rows), stream_view_); + rmm::device_uvector row_nonzero_count(static_cast(n_rows), stream_view_); + rmm::device_uvector row_integer_gcd(static_cast(n_rows), stream_view_); + rmm::device_uvector row_rhs_magnitude(static_cast(n_rows), stream_view_); + rmm::device_uvector row_skip_scaling(static_cast(n_rows), stream_view_); + thrust::fill( + handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(0)); + rmm::device_uvector iteration_scaling(static_cast(n_rows), stream_view_); + rmm::device_uvector cumulative_scaling(static_cast(n_rows), stream_view_); + thrust::fill( + handle_ptr_->get_thrust_policy(), cumulative_scaling.begin(), cumulative_scaling.end(), f_t(1)); + rmm::device_uvector coefficient_row_index(static_cast(nnz), stream_view_); + rmm::device_uvector ref_log2_values(static_cast(n_rows), stream_view_); + + thrust::upper_bound(handle_ptr_->get_thrust_policy(), + matrix_offsets.begin(), + matrix_offsets.end(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(nnz), + coefficient_row_index.begin()); + thrust::transform( + handle_ptr_->get_thrust_policy(), + coefficient_row_index.begin(), + coefficient_row_index.end(), + coefficient_row_index.begin(), + [] __device__(i_t row_upper_bound_idx) -> i_t { return row_upper_bound_idx - 1; }); + + size_t temp_storage_bytes = dry_run_cub(op_problem_scaled_, + n_rows, + row_inf_norm, + row_min_nonzero, + row_nonzero_count, + row_integer_gcd, + stream_view_); + + rmm::device_uvector temp_storage(temp_storage_bytes, stream_view_); + compute_big_m_skip_rows(op_problem_scaled_, + temp_storage, + temp_storage_bytes, + row_inf_norm, + row_min_nonzero, + row_nonzero_count, + row_skip_scaling); + + i_t big_m_rows = thrust::count( + handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(1)); + + CUOPT_LOG_INFO("MIP row scaling start: rows=%d cols=%d max_iterations=%d soft_big_m_rows=%d", + n_rows, + n_cols, + row_scaling_max_iterations, + big_m_rows); + + double previous_row_log2_spread = std::numeric_limits::infinity(); + for (int iteration = 0; iteration < row_scaling_max_iterations; ++iteration) { + compute_row_inf_norm( + op_problem_scaled_, temp_storage, temp_storage_bytes, row_inf_norm, stream_view_); + compute_row_integer_gcd( + op_problem_scaled_, temp_storage, temp_storage_bytes, row_integer_gcd, stream_view_); + + using row_stats_t = thrust::tuple; + auto row_norm_log2_stats = thrust::transform_reduce( + handle_ptr_->get_thrust_policy(), + row_inf_norm.begin(), + row_inf_norm.end(), + [] __device__(f_t row_norm) -> row_stats_t { + if (row_norm == f_t(0)) { + return {0.0, + 0.0, + std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; + } + const double row_log2 = log2(static_cast(row_norm)); + return {row_log2, 1.0, row_log2, row_log2}; + }, + row_stats_t{0.0, + 0.0, + std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}, + [] __device__(row_stats_t a, row_stats_t b) -> row_stats_t { + return {thrust::get<0>(a) + thrust::get<0>(b), + thrust::get<1>(a) + thrust::get<1>(b), + min_op_t{}(thrust::get<2>(a), thrust::get<2>(b)), + max_op_t{}(thrust::get<3>(a), thrust::get<3>(b))}; + }); + const i_t active_row_count = static_cast(thrust::get<1>(row_norm_log2_stats)); + if (active_row_count == 0) { break; } + const double row_log2_spread = + thrust::get<3>(row_norm_log2_stats) - thrust::get<2>(row_norm_log2_stats); + if (iteration == 0 && row_log2_spread <= row_scaling_min_initial_log2_spread) { + CUOPT_LOG_INFO("MIP row scaling skipped: initial_log2_spread=%g threshold=%g", + row_log2_spread, + row_scaling_min_initial_log2_spread); + break; + } + if (std::isfinite(previous_row_log2_spread)) { + const double spread_improvement = previous_row_log2_spread - row_log2_spread; + if (spread_improvement <= + row_scaling_spread_rel_tol * std::max(1.0, previous_row_log2_spread)) { + break; + } + } + previous_row_log2_spread = row_log2_spread; + + thrust::transform(handle_ptr_->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple( + constraint_lower_bounds.begin(), constraint_upper_bounds.begin())), + thrust::make_zip_iterator(thrust::make_tuple(constraint_lower_bounds.end(), + constraint_upper_bounds.end())), + row_rhs_magnitude.begin(), + [] __device__(auto row_bounds) -> f_t { + const f_t lower_bound = thrust::get<0>(row_bounds); + const f_t upper_bound = thrust::get<1>(row_bounds); + f_t rhs_norm = f_t(0); + if (isfinite(lower_bound)) { rhs_norm = raft::abs(lower_bound); } + if (isfinite(upper_bound)) { + const f_t upper_abs = raft::abs(upper_bound); + rhs_norm = upper_abs > rhs_norm ? upper_abs : rhs_norm; + } + return rhs_norm; + }); + + // Fix A+C: median-based target norm excluding big-M rows (robust to outliers) + constexpr double neg_inf_sentinel = -1.0e300; + thrust::transform(handle_ptr_->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple( + row_inf_norm.begin(), row_rhs_magnitude.begin(), row_skip_scaling.begin())), + thrust::make_zip_iterator(thrust::make_tuple( + row_inf_norm.end(), row_rhs_magnitude.end(), row_skip_scaling.end())), + ref_log2_values.begin(), + [neg_inf_sentinel] __device__(auto row_info) -> double { + const f_t row_norm = thrust::get<0>(row_info); + const f_t rhs_norm = thrust::get<1>(row_info); + const i_t is_big_m = thrust::get<2>(row_info); + if (is_big_m) { return neg_inf_sentinel - 1.0; } + const f_t reference_norm = row_norm > rhs_norm ? row_norm : rhs_norm; + if (reference_norm <= f_t(0)) { return neg_inf_sentinel - 1.0; } + return log2(static_cast(reference_norm)); + }); + thrust::sort(handle_ptr_->get_thrust_policy(), ref_log2_values.begin(), ref_log2_values.end()); + auto valid_begin_iter = thrust::lower_bound(handle_ptr_->get_thrust_policy(), + ref_log2_values.begin(), + ref_log2_values.end(), + neg_inf_sentinel); + i_t n_invalid = static_cast(valid_begin_iter - ref_log2_values.begin()); + i_t valid_count = n_rows - n_invalid; + if (valid_count == 0) { break; } + i_t median_idx = n_invalid + valid_count / 2; + double h_median_log2; + RAFT_CUDA_TRY(cudaMemcpyAsync(&h_median_log2, + ref_log2_values.data() + median_idx, + sizeof(double), + cudaMemcpyDeviceToHost, + stream_view_)); + handle_ptr_->sync_stream(); + f_t target_norm = static_cast(exp2(h_median_log2)); + cuopt_assert(std::isfinite(static_cast(target_norm)), "target_norm must be finite"); + cuopt_assert(target_norm > f_t(0), "target_norm must be positive"); + + thrust::transform( + handle_ptr_->get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.begin(), + row_skip_scaling.begin(), + row_integer_gcd.begin(), + cumulative_scaling.begin())), + thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.end(), + row_skip_scaling.end(), + row_integer_gcd.end(), + cumulative_scaling.end())), + iteration_scaling.begin(), + [target_norm] __device__(auto row_info) -> f_t { + const f_t row_norm = thrust::get<0>(row_info); + const i_t is_big_m = thrust::get<1>(row_info); + const std::int64_t row_coeff_gcd = thrust::get<2>(row_info); + const f_t cum_scale = thrust::get<3>(row_info); + if (row_norm == f_t(0)) { return f_t(1); } + + const f_t desired_scaling = target_norm / row_norm; + if (!isfinite(desired_scaling) || desired_scaling <= f_t(0)) { return f_t(1); } + + f_t min_scaling = is_big_m ? static_cast(row_scaling_big_m_soft_min_factor) + : static_cast(row_scaling_min_factor); + f_t max_scaling = is_big_m ? static_cast(row_scaling_big_m_soft_max_factor) + : static_cast(row_scaling_max_factor); + + // Fix B: prevent scaling UP rows that are already numerically large + if (!is_big_m && row_norm >= static_cast(big_m_abs_threshold)) { + if (max_scaling > f_t(1)) { max_scaling = f_t(1); } + } + + const f_t cum_lower = static_cast(cumulative_row_scaling_min) / cum_scale; + const f_t cum_upper = static_cast(cumulative_row_scaling_max) / cum_scale; + if (cum_lower > min_scaling) { min_scaling = cum_lower; } + if (cum_upper < max_scaling) { max_scaling = cum_upper; } + if (min_scaling > max_scaling) { return f_t(1); } + + f_t row_scaling = desired_scaling; + if (row_scaling < min_scaling) { row_scaling = min_scaling; } + if (row_scaling > max_scaling) { row_scaling = max_scaling; } + + // Fix E: prefer power-of-two scaling for integer rows (exact in IEEE 754) + if (row_coeff_gcd > std::int64_t{0}) { + const f_t gcd_value = static_cast(row_coeff_gcd); + if (isfinite(gcd_value) && gcd_value > f_t(0)) { + const double log2_scaling = log2(static_cast(row_scaling)); + int k_candidates[3] = {static_cast(round(log2_scaling)), + static_cast(floor(log2_scaling)), + static_cast(ceil(log2_scaling))}; + bool found_pow2 = false; + for (int ci = 0; ci < 3 && !found_pow2; ++ci) { + int k = k_candidates[ci]; + f_t pow2 = static_cast(exp2(static_cast(k))); + if (pow2 < min_scaling || pow2 > max_scaling) { continue; } + bool preserves = + (k >= 0) || (-k < 63 && (row_coeff_gcd % (std::int64_t{1} << (-k))) == 0); + if (preserves) { + row_scaling = pow2; + found_pow2 = true; + } + } + if (!found_pow2) { + std::int64_t min_mult = static_cast( + ceil(static_cast(min_scaling * gcd_value - + static_cast(integer_multiplier_rounding_tolerance)))); + std::int64_t max_mult = static_cast(floor( + static_cast(max_scaling * gcd_value + + static_cast(integer_multiplier_rounding_tolerance)))); + if (min_mult < std::int64_t{1}) { min_mult = std::int64_t{1}; } + if (max_mult < min_mult) { max_mult = min_mult; } + std::int64_t proj_mult = static_cast(round(row_scaling * gcd_value)); + if (proj_mult < min_mult) { proj_mult = min_mult; } + if (proj_mult > max_mult) { proj_mult = max_mult; } + row_scaling = static_cast(proj_mult) / gcd_value; + } + } + } + return row_scaling; + }); + + i_t scaled_rows = + thrust::count_if(handle_ptr_->get_thrust_policy(), + iteration_scaling.begin(), + iteration_scaling.end(), + [] __device__(f_t row_scale) -> bool { return row_scale != f_t(1); }); + CUOPT_LOG_INFO( + "MIP_SCALING_METRICS iteration=%d log2_spread=%g target_norm=%g scaled_rows=%d " + "valid_rows=%d", + iteration, + row_log2_spread, + static_cast(target_norm), + scaled_rows, + valid_count); + if (scaled_rows == 0) { break; } + + thrust::transform( + handle_ptr_->get_thrust_policy(), + matrix_values.begin(), + matrix_values.end(), + thrust::make_permutation_iterator(iteration_scaling.begin(), coefficient_row_index.begin()), + matrix_values.begin(), + thrust::multiplies{}); + + thrust::transform(handle_ptr_->get_thrust_policy(), + cumulative_scaling.begin(), + cumulative_scaling.end(), + iteration_scaling.begin(), + cumulative_scaling.begin(), + thrust::multiplies{}); + + thrust::transform(handle_ptr_->get_thrust_policy(), + constraint_lower_bounds.begin(), + constraint_lower_bounds.end(), + iteration_scaling.begin(), + constraint_lower_bounds.begin(), + thrust::multiplies{}); + thrust::transform(handle_ptr_->get_thrust_policy(), + constraint_upper_bounds.begin(), + constraint_upper_bounds.end(), + iteration_scaling.begin(), + constraint_upper_bounds.begin(), + thrust::multiplies{}); + if (constraint_bounds.size() == static_cast(n_rows)) { + thrust::transform(handle_ptr_->get_thrust_policy(), + constraint_bounds.begin(), + constraint_bounds.end(), + iteration_scaling.begin(), + constraint_bounds.begin(), + thrust::multiplies{}); + } + } + + CUOPT_LOG_INFO("MIP_SCALING_SUMMARY rows=%d bigm_rows=%d final_spread=%g", + n_rows, + big_m_rows, + previous_row_log2_spread); + + const f_t post_max_coeff = thrust::transform_reduce(handle_ptr_->get_thrust_policy(), + matrix_values.begin(), + matrix_values.end(), + abs_value_transform_t{}, + f_t(0), + max_op_t{}); + const f_t post_min_nonzero_coeff = thrust::transform_reduce(handle_ptr_->get_thrust_policy(), + matrix_values.begin(), + matrix_values.end(), + nonzero_abs_or_inf_transform_t{}, + std::numeric_limits::infinity(), + min_op_t{}); + if (std::isfinite(static_cast(post_max_coeff)) && + std::isfinite(static_cast(post_min_nonzero_coeff)) && + post_min_nonzero_coeff > f_t(0)) { + const double post_ratio = + static_cast(post_max_coeff) / static_cast(post_min_nonzero_coeff); + if (post_ratio > post_scaling_max_ratio_warn) { + CUOPT_LOG_WARN( + "MIP row scaling: extreme coefficient ratio after scaling: max=%g min_nz=%g ratio=%g", + static_cast(post_max_coeff), + static_cast(post_min_nonzero_coeff), + post_ratio); + } + } + + CUOPT_LOG_INFO("MIP row scaling completed"); + op_problem_scaled_.print_scaling_information(); +} + +#define INSTANTIATE(F_TYPE) template class mip_scaling_strategy_t; + +#if MIP_INSTANTIATE_FLOAT +INSTANTIATE(float) +#endif + +#if MIP_INSTANTIATE_DOUBLE +INSTANTIATE(double) +#endif + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cuh b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh new file mode 100644 index 0000000000..0fc83b8cb9 --- /dev/null +++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh @@ -0,0 +1,32 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include + +#include + +#include + +namespace cuopt::linear_programming::detail { + +template +class mip_scaling_strategy_t { + public: + using optimization_problem_type_t = cuopt::linear_programming::optimization_problem_t; + explicit mip_scaling_strategy_t(optimization_problem_type_t& op_problem_scaled); + + void scale_problem(); + + private: + raft::handle_t const* handle_ptr_{nullptr}; + rmm::cuda_stream_view stream_view_; + optimization_problem_type_t& op_problem_scaled_; +}; + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp index 20a586f6fb..85e495608f 100644 --- a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp +++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp @@ -669,6 +669,10 @@ std::optional> third_party_presolve_t( papilo_problem, op_problem.get_handle_ptr(), category, maximize_); + // metadata from original optimization problem that is not filled + opt_problem.set_problem_name(op_problem.get_problem_name()); + opt_problem.set_objective_scaling_factor(op_problem.get_objective_scaling_factor()); + // when an objective offset outside (e.g. from mps file), handle accordingly auto col_flags = papilo_problem.getColFlags(); std::vector implied_integer_indices; for (size_t i = 0; i < col_flags.size(); i++) { diff --git a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu index e2bbc8feb1..84415f5372 100644 --- a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu +++ b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu @@ -8,7 +8,6 @@ #include "relaxed_lp.cuh" #include -#include #include #include #include diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index f5a2172f2e..8733ecfbab 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -9,13 +9,13 @@ #include #include +#include #include #include #include #include #include -#include #include #include #include @@ -64,12 +64,7 @@ mip_solution_t run_mip(detail::problem_t& problem, timer_t& timer) { raft::common::nvtx::range fun_scope("run_mip"); - auto constexpr const running_mip = true; - // TODO ask Akif and Alice how was this passed down? - auto hyper_params = settings.hyper_params; - hyper_params.update_primal_weight_on_initial_solution = false; - hyper_params.update_step_size_on_initial_solution = true; if (settings.get_mip_callbacks().size() > 0) { auto callback_num_variables = problem.original_problem_ptr->get_n_variables(); if (problem.has_papilo_presolve_data()) { @@ -133,33 +128,12 @@ mip_solution_t run_mip(detail::problem_t& problem, "Size mismatch"); cuopt_assert(problem.original_problem_ptr->get_n_constraints() == scaled_problem.n_constraints, "Size mismatch"); - detail::pdlp_initial_scaling_strategy_t scaling( - scaled_problem.handle_ptr, - scaled_problem, - hyper_params.default_l_inf_ruiz_iterations, - (f_t)hyper_params.default_alpha_pock_chambolle_rescaling, - scaled_problem.reverse_coefficients, - scaled_problem.reverse_offsets, - scaled_problem.reverse_constraints, - nullptr, - hyper_params, - running_mip); - - cuopt_func_call(auto saved_problem = scaled_problem); - if (settings.mip_scaling) { - scaling.scale_problem(); - if (settings.initial_solutions.size() > 0) { - for (const auto& initial_solution : settings.initial_solutions) { - scaling.scale_primal(*initial_solution); - } - } - } + // only call preprocess on scaled problem, so we can compute feasibility on the original problem scaled_problem.preprocess_problem(); - // cuopt_func_call((check_scaled_problem(scaled_problem, saved_problem))); detail::trivial_presolve(scaled_problem); - detail::mip_solver_t solver(scaled_problem, settings, scaling, timer); + detail::mip_solver_t solver(scaled_problem, settings, timer); if (timer.check_time_limit()) { CUOPT_LOG_INFO("Time limit reached before main solve"); detail::solution_t sol(problem); @@ -170,17 +144,16 @@ mip_solution_t run_mip(detail::problem_t& problem, auto scaled_sol = solver.run_solver(); bool is_feasible_before_scaling = scaled_sol.get_feasible(); scaled_sol.problem_ptr = &problem; - if (settings.mip_scaling) { scaling.unscale_solutions(scaled_sol); } // at this point we need to compute the feasibility on the original problem not the presolved one bool is_feasible_after_unscaling = scaled_sol.compute_feasibility(); if (!scaled_problem.empty && is_feasible_before_scaling != is_feasible_after_unscaling) { CUOPT_LOG_WARN( - "The feasibility does not match on scaled and unscaled problems. To overcome this issue, " + "The feasibility does not match on scaled and original problems. To overcome this issue, " "please provide a more numerically stable problem."); } auto sol = scaled_sol.get_solution( - is_feasible_before_scaling || is_feasible_after_unscaling, solver.get_solver_stats(), false); + is_feasible_after_unscaling || is_feasible_before_scaling, solver.get_solver_stats(), false); int hidesol = std::getenv("CUOPT_MIP_HIDE_SOLUTION") ? atoi(std::getenv("CUOPT_MIP_HIDE_SOLUTION")) : 0; @@ -241,7 +214,10 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, } auto timer = timer_t(time_limit); - + if (settings.mip_scaling) { + detail::mip_scaling_strategy_t scaling(op_problem); + scaling.scale_problem(); + } double presolve_time = 0.0; std::unique_ptr> presolver; std::optional> presolve_result; diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index e6f6d50b62..387c2926af 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -41,14 +41,10 @@ static void init_handler(const raft::handle_t* handle_ptr) template mip_solver_t::mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, - pdlp_initial_scaling_strategy_t& scaling, timer_t timer) : op_problem_(op_problem), solver_settings_(solver_settings), - context(op_problem.handle_ptr, - const_cast*>(&op_problem), - solver_settings, - scaling), + context(op_problem.handle_ptr, const_cast*>(&op_problem), solver_settings), timer_(timer) { init_handler(op_problem.handle_ptr); diff --git a/cpp/src/mip_heuristics/solver.cuh b/cpp/src/mip_heuristics/solver.cuh index 1b5fe17244..9b9024a1dc 100644 --- a/cpp/src/mip_heuristics/solver.cuh +++ b/cpp/src/mip_heuristics/solver.cuh @@ -20,7 +20,6 @@ class mip_solver_t { public: explicit mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, - pdlp_initial_scaling_strategy_t& scaling, timer_t timer); solution_t run_solver(); diff --git a/cpp/src/mip_heuristics/solver_context.cuh b/cpp/src/mip_heuristics/solver_context.cuh index baac1dd9d6..d68a19bb61 100644 --- a/cpp/src/mip_heuristics/solver_context.cuh +++ b/cpp/src/mip_heuristics/solver_context.cuh @@ -9,7 +9,6 @@ #include #include -#include #include #include @@ -34,9 +33,8 @@ template struct mip_solver_context_t { explicit mip_solver_context_t(raft::handle_t const* handle_ptr_, problem_t* problem_ptr_, - mip_solver_settings_t settings_, - pdlp_initial_scaling_strategy_t& scaling) - : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_), scaling(scaling) + mip_solver_settings_t settings_) + : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_) { cuopt_assert(problem_ptr != nullptr, "problem_ptr is nullptr"); stats.set_solution_bound(problem_ptr->maximize ? std::numeric_limits::infinity() @@ -53,7 +51,6 @@ struct mip_solver_context_t { diversity_manager_t* diversity_manager_ptr{nullptr}; std::atomic preempt_heuristic_solver_ = false; const mip_solver_settings_t settings; - pdlp_initial_scaling_strategy_t& scaling; solver_stats_t stats; // Work limit context for tracking work units in deterministic mode (shared across all timers in // GPU heuristic loop) diff --git a/cpp/tests/mip/feasibility_jump_tests.cu b/cpp/tests/mip/feasibility_jump_tests.cu index baa3e9b803..4e8a518522 100644 --- a/cpp/tests/mip/feasibility_jump_tests.cu +++ b/cpp/tests/mip/feasibility_jump_tests.cu @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -77,16 +78,7 @@ static fj_state_t run_fj(std::string test_instance, // run the problem constructor of MIP, so that we do bounds standardization detail::problem_t problem(op_problem); problem.preprocess_problem(); - detail::pdhg_solver_t pdhg_solver(problem.handle_ptr, problem); - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - pdhg_solver, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - true); + detail::mip_scaling_strategy_t scaling(problem); auto settings = mip_solver_settings_t{}; settings.time_limit = 30.; diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index 5e2f08007d..1f825a26f7 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -9,11 +9,11 @@ #include "mip_utils.cuh" #include +#include #include #include #include #include -#include #include #include #include @@ -128,16 +128,7 @@ void test_multi_probe(std::string path) problem_checking_t::check_problem_representation(op_problem); detail::problem_t problem(op_problem); mip_solver_settings_t default_settings{}; - detail::pdhg_solver_t pdhg_solver(problem.handle_ptr, problem); - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - nullptr, - true); + detail::mip_scaling_strategy_t scaling(problem); detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); detail::load_balanced_problem_t lb_problem(problem); detail::load_balanced_bounds_presolve_t lb_prs(lb_problem, solver.context); diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index 073c153486..003220de9b 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -9,10 +9,10 @@ #include "mip_utils.cuh" #include +#include #include #include #include -#include #include #include #include @@ -150,18 +150,7 @@ void test_multi_probe(std::string path) problem_checking_t::check_problem_representation(op_problem); detail::problem_t problem(op_problem); mip_solver_settings_t default_settings{}; - pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - nullptr, - hyper_params, - true); - detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); + detail::mip_solver_t solver(problem, default_settings, cuopt::timer_t(0)); detail::bound_presolve_t bnd_prb_0(solver.context); detail::bound_presolve_t bnd_prb_1(solver.context); detail::multi_probe_t multi_probe_prs(solver.context); diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py index 59ea62089d..8412c745b5 100644 --- a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py +++ b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py @@ -450,11 +450,6 @@ class SolverConfig(BaseModel): description="Set True to run heuristics only, False to run " "heuristics and branch and bound for MILP", ) - mip_batch_pdlp_strong_branching: Optional[int] = Field( - default=0, - description="Set 1 to enable batch PDLP strong branching " - "in the MIP solver, 0 to disable.", - ) num_cpu_threads: Optional[int] = Field( default=None, description="Set the number of CPU threads to use for branch and bound.", # noqa