diff --git a/benchmarks/linear_programming/cuopt/parse_scaling_logs.py b/benchmarks/linear_programming/cuopt/parse_scaling_logs.py
new file mode 100644
index 0000000000..7e46c0c3e4
--- /dev/null
+++ b/benchmarks/linear_programming/cuopt/parse_scaling_logs.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""Parse MIP scaling benchmark logs into CSV.
+
+Reads log output (from stdin or file arguments) produced by the cuOpt MIP
+solver with the MIP_SCALING_METRICS / MIP_SCALING_SUMMARY / MIP_OBJ_SCALING /
+MIP_GAP_METRICS log lines and the standard solver output.
+
+Usage:
+    python parse_scaling_logs.py < benchmark_output.log > results.csv
+    python parse_scaling_logs.py log1.log log2.log > results.csv
+"""
+
+import csv
+import re
+import sys
+from collections import defaultdict
+
+
+def _float_or_na(s):
+    try:
+        return float(s)
+    except (ValueError, TypeError):
+        return ""
+
+
+def _int_or_na(s):
+    try:
+        return int(s)
+    except (ValueError, TypeError):
+        return ""
+
+
+INSTANCE_RE = re.compile(
+    r"(?:Reading|Solving|instance[=:]\s*)(\S+?)(?:\.mps)?(?:\s|$)", re.I
+)
+FEASIBLE_RE = re.compile(
+    r"Found new best solution.*objective[=:\s]+([\d.eE+\-]+)", re.I
+)
+OPTIMAL_RE = re.compile(r"Optimal solution found", re.I)
+OBJ_RE = re.compile(r"Objective\s+([\d.eE+\-]+)", re.I)
+REL_GAP_RE = re.compile(r"relative_mip_gap\s+([\d.eE+\-]+)", re.I)
+SIMPLEX_RE = re.compile(r"simplex_iterations\s+([\d,]+)", re.I)
+NODES_RE = re.compile(r"Explored\s+(\d+)\s+nodes\s+in\s+([\d.]+)s", re.I)
+INFEASIBLE_RE = re.compile(r"Integer infeasible|Infeasible", re.I)
+
+SCALING_METRICS_RE = re.compile(
+    r"MIP_SCALING_METRICS\s+iteration=(\d+)\s+log2_spread=([\d.eE+\-]+)\s+"
+    r"target_norm=([\d.eE+\-]+)\s+scaled_rows=(\d+)\s+valid_rows=(\d+)"
+)
+SCALING_SUMMARY_RE = re.compile(
+    r"MIP_SCALING_SUMMARY\s+rows=(\d+)\s+bigm_rows=(\d+)\s+final_spread=([\d.eE+\-inf]+)"
+)
+OBJ_SCALING_RE = re.compile(
+    r"MIP_OBJ_SCALING\s+(applied|skipped).*?(?:scale=([\d.eE+\-]+))?"
+    r".*?(?:new_scaling_factor=([\d.eE+\-]+))?"
+)
+GAP_METRICS_RE = re.compile(
+    r"MIP_GAP_METRICS\s+abs_gap_user=([\d.eE+\-]+)\s+rel_gap=([\d.eE+\-]+)\s+"
+    r"obj_user=([\d.eE+\-]+)\s+bound_user=([\d.eE+\-]+)\s+obj_scale=([\d.eE+\-]+)"
+)
+SCALING_SKIPPED_RE = re.compile(r"MIP row scaling skipped", re.I)
+SOL_FOUND_RE = re.compile(r"sol_found=(\d+).*?obj_val=([\d.eE+\-inf]+)", re.I)
+RUN_MPS_RE = re.compile(r"run_mps\s+(\S+)", re.I)
+
+
+def parse_logs(lines):
+    records = []
+    cur = defaultdict(lambda: "")
+    instance_name = ""
+
+    def flush():
+        nonlocal instance_name
+        if instance_name:
+            cur["instance"] = instance_name
+            records.append(dict(cur))
+        cur.clear()
+        instance_name = ""
+
+    for line in lines:
+        line = line.rstrip("\n")
+
+        m = RUN_MPS_RE.search(line)
+        if m:
+            flush()
+            instance_name = m.group(1).replace(".mps", "")
+            continue
+
+        m = INSTANCE_RE.search(line)
+        if m and not instance_name:
+            instance_name = m.group(1)
+
+        m = SCALING_SKIPPED_RE.search(line)
+        if m:
+            cur["scaling_applied"] = "no"
+
+        m = SCALING_METRICS_RE.search(line)
+        if m:
+            cur["scaling_applied"] = "yes"
+            cur["scaling_last_iteration"] = m.group(1)
+            cur["scaling_last_spread"] = m.group(2)
+            cur["scaling_target_norm"] = m.group(3)
+
+        m = SCALING_SUMMARY_RE.search(line)
+        if m:
+            cur["rows"] = m.group(1)
+            cur["bigm_rows"] = m.group(2)
+            cur["final_spread"] = m.group(3)
+
+        m = OBJ_SCALING_RE.search(line)
+        if m:
+            cur["obj_scaling_status"] = m.group(1)
+            if m.group(2):
+                cur["obj_scaling_factor"] = m.group(2)
+            if m.group(3):
+                cur["obj_new_scaling_factor"] = m.group(3)
+
+        m = FEASIBLE_RE.search(line)
+        if m:
+            cur["feasible"] = 1
+            cur["objective"] = m.group(1)
+
+        m = OPTIMAL_RE.search(line)
+        if m:
+            cur["optimal"] = 1
+
+        m = NODES_RE.search(line)
+        if m:
+            cur["nodes_explored"] = m.group(1)
+            cur["solve_time_s"] = m.group(2)
+
+        m = SIMPLEX_RE.search(line)
+        if m:
+            cur["simplex_iters"] = m.group(1).replace(",", "")
+
+        m = GAP_METRICS_RE.search(line)
+        if m:
+            cur["abs_gap_user"] = m.group(1)
+            cur["rel_gap"] = m.group(2)
+            cur["obj_user"] = m.group(3)
+            cur["bound_user"] = m.group(4)
+            cur["obj_scale"] = m.group(5)
+
+        m = SOL_FOUND_RE.search(line)
+        if m:
+            cur["feasible"] = int(m.group(1))
+            cur["objective"] = m.group(2)
+
+        m = INFEASIBLE_RE.search(line)
+        if m:
+            cur["feasible"] = 0
+
+    flush()
+    return records
+
+
+COLUMNS = [
+    "instance",
+    "feasible",
+    "optimal",
+    "objective",
+    "rel_gap",
+    "abs_gap_user",
+    "obj_user",
+    "bound_user",
+    "solve_time_s",
+    "simplex_iters",
+    "nodes_explored",
+    "scaling_applied",
+    "bigm_rows",
+    "rows",
+    "final_spread",
+    "scaling_last_iteration",
+    "scaling_target_norm",
+    "obj_scaling_status",
+    "obj_scaling_factor",
+    "obj_new_scaling_factor",
+    "obj_scale",
+]
+
+
+def main():
+    if len(sys.argv) > 1:
+        lines = []
+        for path in sys.argv[1:]:
+            with open(path) as f:
+                lines.extend(f.readlines())
+    else:
+        lines = sys.stdin.readlines()
+
+    records = parse_logs(lines)
+
+    writer = csv.DictWriter(
+        sys.stdout, fieldnames=COLUMNS, extrasaction="ignore"
+    )
+    writer.writeheader()
+    for rec in records:
+        writer.writerow(rec)
+
+    n_feasible = sum(1 for r in records if r.get("feasible") == 1)
+    n_optimal = sum(1 for r in records if r.get("optimal") == 1)
+    n_total = len(records)
+    print(
+        f"# Summary: {n_total} instances, {n_feasible} feasible, {n_optimal} optimal",
+        file=sys.stderr,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp
index 9b79dff8af..e2a244844b 100644
--- a/benchmarks/linear_programming/cuopt/run_mip.cpp
+++ b/benchmarks/linear_programming/cuopt/run_mip.cpp
@@ -209,6 +209,7 @@ int run_single_file(std::string file_path,
   settings.tolerances.absolute_tolerance = 1e-6;
   settings.presolver                     = cuopt::linear_programming::presolver_t::Default;
   settings.reliability_branching         = reliability_branching;
+  settings.mip_scaling                   = true;
   settings.seed                          = 42;
   cuopt::linear_programming::benchmark_info_t benchmark_info;
   settings.benchmark_info_ptr = &benchmark_info;
diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
index 6d32cd5ed9..60534a36fa 100644
--- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
+++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp
@@ -107,7 +107,7 @@ class mip_solver_settings_t {
 
   /** Initial primal solutions */
   std::vector<std::shared_ptr<rmm::device_uvector<f_t>>> initial_solutions;
-  bool mip_scaling = false;
+  bool mip_scaling = true;
   presolver_t presolver{presolver_t::Default};
   /**
    * @brief Determinism mode for MIP solver.
diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp
index 6ce9a4f4d0..790e6b399d 100644
--- a/cpp/src/branch_and_bound/branch_and_bound.cpp
+++ b/cpp/src/branch_and_bound/branch_and_bound.cpp
@@ -695,9 +695,9 @@ void branch_and_bound_t<i_t, f_t>::set_final_solution(mip_solution_t<i_t, f_t>&
     settings_.heuristic_preemption_callback();
   }
 
-  f_t gap              = upper_bound_ - lower_bound;
   f_t obj              = compute_user_objective(original_lp_, upper_bound_.load());
   f_t user_bound       = compute_user_objective(original_lp_, lower_bound);
+  f_t gap              = std::abs(obj - user_bound);
   f_t gap_rel          = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound);
   bool is_maximization = original_lp_.obj_scale < 0.0;
 
@@ -709,6 +709,13 @@ void branch_and_bound_t<i_t, f_t>::set_final_solution(mip_solution_t<i_t, f_t>&
                        obj,
                        is_maximization ? "Upper" : "Lower",
                        user_bound);
+  settings_.log.printf(
+    "MIP_GAP_METRICS abs_gap_user=%e rel_gap=%e obj_user=%.16e bound_user=%.16e obj_scale=%e\n",
+    gap,
+    gap_rel,
+    obj,
+    user_bound,
+    original_lp_.obj_scale);
 
   if (gap <= settings_.absolute_mip_gap_tol || gap_rel <= settings_.relative_mip_gap_tol) {
     solver_status_ = mip_status_t::OPTIMAL;
@@ -1155,7 +1162,10 @@ std::pair<node_status_t, rounding_direction_t> branch_and_bound_t<i_t, f_t>::upd
   Policy& policy)
 {
   constexpr f_t inf                      = std::numeric_limits<f_t>::infinity();
-  const f_t abs_fathom_tol               = settings_.absolute_mip_gap_tol / 10;
+  const f_t obj_scale_mag                = std::abs(original_lp_.obj_scale);
+  const f_t abs_fathom_tol               = obj_scale_mag > f_t(0)
+                                             ? settings_.absolute_mip_gap_tol / (obj_scale_mag * f_t(10))
+                                             : settings_.absolute_mip_gap_tol / f_t(10);
   lp_problem_t<i_t, f_t>& leaf_problem   = worker->leaf_problem;
   lp_solution_t<i_t, f_t>& leaf_solution = worker->leaf_solution;
   const f_t upper_bound                  = policy.upper_bound();
@@ -1606,7 +1616,8 @@ void branch_and_bound_t<i_t, f_t>::run_scheduler()
 #endif
 
   f_t lower_bound     = get_lower_bound();
-  f_t abs_gap         = upper_bound_ - lower_bound;
+  f_t abs_gap         = std::abs(compute_user_objective(original_lp_, upper_bound_.load()) -
+                         compute_user_objective(original_lp_, lower_bound));
   f_t rel_gap         = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound);
   i_t last_node_depth = 0;
   i_t last_int_infeas = 0;
@@ -1616,7 +1627,8 @@ void branch_and_bound_t<i_t, f_t>::run_scheduler()
          (active_workers_per_strategy_[0] > 0 || node_queue_.best_first_queue_size() > 0)) {
     bool launched_any_task = false;
     lower_bound            = get_lower_bound();
-    abs_gap                = upper_bound_ - lower_bound;
+    abs_gap                = std::abs(compute_user_objective(original_lp_, upper_bound_.load()) -
+                       compute_user_objective(original_lp_, lower_bound));
     rel_gap                = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound);
 
     repair_heuristic_solutions();
@@ -1731,14 +1743,16 @@ void branch_and_bound_t<i_t, f_t>::single_threaded_solve()
   branch_and_bound_worker_t<i_t, f_t> worker(0, original_lp_, Arow_, var_types_, settings_);
 
   f_t lower_bound = get_lower_bound();
-  f_t abs_gap     = upper_bound_ - lower_bound;
+  f_t abs_gap     = std::abs(compute_user_objective(original_lp_, upper_bound_.load()) -
+                         compute_user_objective(original_lp_, lower_bound));
   f_t rel_gap     = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound);
 
   while (solver_status_ == mip_status_t::UNSET && abs_gap > settings_.absolute_mip_gap_tol &&
          rel_gap > settings_.relative_mip_gap_tol && node_queue_.best_first_queue_size() > 0) {
     bool launched_any_task = false;
     lower_bound            = get_lower_bound();
-    abs_gap                = upper_bound_ - lower_bound;
+    abs_gap                = std::abs(compute_user_objective(original_lp_, upper_bound_.load()) -
+                       compute_user_objective(original_lp_, lower_bound));
     rel_gap                = user_relative_gap(original_lp_, upper_bound_.load(), lower_bound);
 
     repair_heuristic_solutions();
@@ -2317,7 +2331,8 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
       report(' ', obj, root_objective_, 0, num_fractional);
 
       f_t rel_gap = user_relative_gap(original_lp_, upper_bound_.load(), root_objective_);
-      f_t abs_gap = upper_bound_.load() - root_objective_;
+      f_t abs_gap = std::abs(compute_user_objective(original_lp_, upper_bound_.load()) -
+                             compute_user_objective(original_lp_, root_objective_));
       if (rel_gap < settings_.relative_mip_gap_tol || abs_gap < settings_.absolute_mip_gap_tol) {
         set_solution_at_root(solution, cut_info);
         set_final_solution(solution, root_objective_);
diff --git a/cpp/src/dual_simplex/user_problem.hpp b/cpp/src/dual_simplex/user_problem.hpp
index f50a6d33a5..73c4c391be 100644
--- a/cpp/src/dual_simplex/user_problem.hpp
+++ b/cpp/src/dual_simplex/user_problem.hpp
@@ -46,7 +46,7 @@ struct user_problem_t {
   std::vector<std::string> row_names;
   std::vector<std::string> col_names;
   f_t obj_constant;
-  f_t obj_scale;  // 1.0 for min, -1.0 for max
+  f_t obj_scale;  // positive for min, netagive for max
   bool objective_is_integral{false};
   std::vector<variable_type_t> var_types;
   std::vector<i_t> Q_offsets;
diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt
index a200d4265b..5193c0dd91 100644
--- a/cpp/src/mip_heuristics/CMakeLists.txt
+++ b/cpp/src/mip_heuristics/CMakeLists.txt
@@ -18,6 +18,7 @@ set(MIP_LP_NECESSARY_FILES
 
 # Files that are MIP-specific and not needed for pure LP
 set(MIP_NON_LP_FILES
+  ${CMAKE_CURRENT_SOURCE_DIR}/mip_scaling_strategy.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/solver.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/diversity/assignment_hash_map.cu
diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu
index ed165fe610..88b413aefa 100644
--- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu
+++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu
@@ -180,6 +180,7 @@ bool diversity_manager_t<i_t, f_t>::run_presolve(f_t time_limit, timer_t global_
   raft::common::nvtx::range fun_scope("run_presolve");
   CUOPT_LOG_INFO("Running presolve!");
   timer_t presolve_timer(time_limit);
+
   auto term_crit = ls.constraint_prop.bounds_update.solve(*problem_ptr);
   if (ls.constraint_prop.bounds_update.infeas_constraints_count > 0) {
     stats.presolve_time = timer.elapsed_time();
@@ -412,23 +413,26 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
   } else if (!fj_only_run) {
     convert_greater_to_less(*problem_ptr);
 
-    f_t tolerance_divisor =
-      problem_ptr->tolerances.absolute_tolerance / problem_ptr->tolerances.relative_tolerance;
-    if (tolerance_divisor == 0) { tolerance_divisor = 1; }
     f_t absolute_tolerance = context.settings.tolerances.absolute_tolerance;
+    // f_t tolerance_divisor  = 100.;
 
     pdlp_solver_settings_t<i_t, f_t> pdlp_settings{};
-    pdlp_settings.tolerances.relative_primal_tolerance = absolute_tolerance / tolerance_divisor;
-    pdlp_settings.tolerances.relative_dual_tolerance   = absolute_tolerance / tolerance_divisor;
-    pdlp_settings.time_limit                           = lp_time_limit;
-    pdlp_settings.first_primal_feasible                = false;
-    pdlp_settings.concurrent_halt                      = &global_concurrent_halt;
-    pdlp_settings.method                               = method_t::Concurrent;
-    pdlp_settings.inside_mip                           = true;
-    pdlp_settings.pdlp_solver_mode                     = pdlp_solver_mode_t::Stable2;
-    pdlp_settings.num_gpus                             = context.settings.num_gpus;
-    pdlp_settings.presolver                            = presolver_t::None;
-
+    pdlp_settings.tolerances.absolute_dual_tolerance = absolute_tolerance;
+    pdlp_settings.tolerances.relative_dual_tolerance =
+      context.settings.tolerances.relative_tolerance;
+    pdlp_settings.tolerances.absolute_primal_tolerance = absolute_tolerance;
+    pdlp_settings.tolerances.relative_primal_tolerance =
+      context.settings.tolerances.relative_tolerance;
+    pdlp_settings.time_limit              = lp_time_limit;
+    pdlp_settings.first_primal_feasible   = false;
+    pdlp_settings.concurrent_halt         = &global_concurrent_halt;
+    pdlp_settings.method                  = method_t::Concurrent;
+    pdlp_settings.inside_mip              = true;
+    pdlp_settings.pdlp_solver_mode        = pdlp_solver_mode_t::Stable2;
+    pdlp_settings.num_gpus                = context.settings.num_gpus;
+    pdlp_settings.presolver               = presolver_t::None;
+    pdlp_settings.per_constraint_residual = true;
+    set_pdlp_solver_mode(pdlp_settings);
     timer_t lp_timer(lp_time_limit);
     auto lp_result = solve_lp_with_method<i_t, f_t>(*problem_ptr, pdlp_settings, lp_timer);
 
diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cu b/cpp/src/mip_heuristics/diversity/lns/rins.cu
index 2fbe79ba34..cb0aa82ea0 100644
--- a/cpp/src/mip_heuristics/diversity/lns/rins.cu
+++ b/cpp/src/mip_heuristics/diversity/lns/rins.cu
@@ -223,8 +223,7 @@ void rins_t<i_t, f_t>::run_rins()
 
   std::vector<std::vector<f_t>> rins_solution_queue;
 
-  mip_solver_context_t<i_t, f_t> fj_context(
-    &rins_handle, &fixed_problem, context.settings, context.scaling);
+  mip_solver_context_t<i_t, f_t> fj_context(&rins_handle, &fixed_problem, context.settings);
   fj_t<i_t, f_t> fj(fj_context);
   solution_t<i_t, f_t> fj_solution(fixed_problem);
   fj_solution.copy_new_assignment(cuopt::host_copy(fixed_assignment, rins_handle.get_stream()));
diff --git a/cpp/src/mip_heuristics/diversity/population.cu b/cpp/src/mip_heuristics/diversity/population.cu
index bca87223d9..bb0fdd6d11 100644
--- a/cpp/src/mip_heuristics/diversity/population.cu
+++ b/cpp/src/mip_heuristics/diversity/population.cu
@@ -265,10 +265,6 @@ void population_t<i_t, f_t>::invoke_get_solution_callback(
   f_t user_bound     = context.stats.get_solution_bound();
   solution_t<i_t, f_t> temp_sol(sol);
   problem_ptr->post_process_assignment(temp_sol.assignment);
-  if (context.settings.mip_scaling) {
-    rmm::device_uvector<f_t> dummy(0, temp_sol.handle_ptr->get_stream());
-    context.scaling.unscale_solutions(temp_sol.assignment, dummy);
-  }
   if (problem_ptr->has_papilo_presolve_data()) {
     problem_ptr->papilo_uncrush_assignment(temp_sol.assignment);
   }
@@ -308,10 +304,8 @@ void population_t<i_t, f_t>::run_solution_callbacks(solution_t<i_t, f_t>& sol)
         invoke_get_solution_callback(sol, get_sol_callback);
       }
     }
-    // save the best objective here, because we might not have been able to return the solution to
-    // the user because of the unscaling that causes infeasibility.
-    // This prevents an issue of repaired, or a fully feasible solution being reported in the call
-    // back in next run.
+    // Save the best objective here even if callback handling later exits early.
+    // This prevents older solutions from being reported as "new best" in subsequent callbacks.
     best_feasible_objective = sol.get_objective();
   }
 
@@ -344,7 +338,6 @@ void population_t<i_t, f_t>::run_solution_callbacks(solution_t<i_t, f_t>& sol)
                  incumbent_assignment.size(),
                  sol.handle_ptr->get_stream());
 
-      if (context.settings.mip_scaling) { context.scaling.scale_solutions(incumbent_assignment); }
       bool is_valid = problem_ptr->pre_process_assignment(incumbent_assignment);
       if (!is_valid) { return; }
       cuopt_assert(outside_sol.assignment.size() == incumbent_assignment.size(),
diff --git a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh
index b2f7f80066..e6bb49e6b9 100644
--- a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh
+++ b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh
@@ -14,6 +14,7 @@
 #include <dual_simplex/simplex_solver_settings.hpp>
 #include <dual_simplex/solve.hpp>
 #include <dual_simplex/tic_toc.hpp>
+#include <pdlp/initial_scaling_strategy/initial_scaling.cuh>
 
 namespace cuopt::linear_programming::detail {
 
diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cu b/cpp/src/mip_heuristics/mip_scaling_strategy.cu
new file mode 100644
index 0000000000..8c9b168471
--- /dev/null
+++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cu
@@ -0,0 +1,806 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#include <mip_heuristics/mip_constants.hpp>
+#include <mip_heuristics/mip_scaling_strategy.cuh>
+#include <pdlp/utils.cuh>
+#include <utilities/logger.hpp>
+
+#include <raft/util/cudart_utils.hpp>
+
+#include <cub/cub.cuh>
+
+#include <thrust/binary_search.h>
+#include <thrust/count.h>
+#include <thrust/fill.h>
+#include <thrust/functional.h>
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/permutation_iterator.h>
+#include <thrust/iterator/transform_iterator.h>
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/sort.h>
+#include <thrust/transform.h>
+#include <thrust/transform_reduce.h>
+#include <thrust/tuple.h>
+
+#include <rmm/device_uvector.hpp>
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+
+namespace cuopt::linear_programming::detail {
+
+constexpr int row_scaling_max_iterations             = 8;
+constexpr double row_scaling_min_initial_log2_spread = 12.0;
+constexpr int row_scaling_factor_exponent            = 5;
+constexpr int row_scaling_big_m_soft_factor_exponent = 4;
+constexpr double row_scaling_min_factor =
+  1.0 / static_cast<double>(std::uint64_t{1} << row_scaling_factor_exponent);
+constexpr double row_scaling_max_factor =
+  static_cast<double>(std::uint64_t{1} << row_scaling_factor_exponent);
+constexpr double row_scaling_big_m_soft_min_factor =
+  1.0 / static_cast<double>(std::uint64_t{1} << row_scaling_big_m_soft_factor_exponent);
+constexpr double row_scaling_big_m_soft_max_factor       = 1.0;
+constexpr double row_scaling_spread_rel_tol              = 1.0e-2;
+constexpr double integer_coefficient_rel_tol             = 1.0e-6;
+constexpr double integer_multiplier_rounding_tolerance   = 1.0e-6;
+constexpr double min_abs_objective_coefficient_threshold = 1.0e-2;
+constexpr double max_obj_scaling_coefficient             = 1.0e3;
+
+constexpr int cumulative_row_scaling_exponent = 8;
+constexpr double cumulative_row_scaling_min =
+  1.0 / static_cast<double>(std::uint64_t{1} << cumulative_row_scaling_exponent);
+constexpr double cumulative_row_scaling_max =
+  static_cast<double>(std::uint64_t{1} << cumulative_row_scaling_exponent);
+
+constexpr double post_scaling_max_ratio_warn = 1.0e15;
+
+constexpr double big_m_abs_threshold   = 1.0e4;
+constexpr double big_m_ratio_threshold = 1.0e4;
+
+template <typename f_t>
+struct abs_value_transform_t {
+  __device__ f_t operator()(f_t value) const { return raft::abs(value); }
+};
+
+template <typename f_t>
+struct nonzero_abs_or_inf_transform_t {
+  __device__ f_t operator()(f_t value) const
+  {
+    const f_t abs_value = raft::abs(value);
+    return abs_value > f_t(0) ? abs_value : std::numeric_limits<f_t>::infinity();
+  }
+};
+
+template <typename i_t, typename f_t>
+struct nonzero_count_transform_t {
+  __device__ i_t operator()(f_t value) const { return raft::abs(value) > f_t(0) ? i_t(1) : i_t(0); }
+};
+
+template <typename item_t>
+struct max_op_t {
+  __host__ __device__ item_t operator()(const item_t& lhs, const item_t& rhs) const
+  {
+    return lhs > rhs ? lhs : rhs;
+  }
+};
+
+template <typename item_t>
+struct min_op_t {
+  __host__ __device__ item_t operator()(const item_t& lhs, const item_t& rhs) const
+  {
+    return lhs < rhs ? lhs : rhs;
+  }
+};
+
+struct gcd_op_t {
+  __host__ __device__ std::int64_t operator()(std::int64_t lhs, std::int64_t rhs) const
+  {
+    lhs = lhs < 0 ? -lhs : lhs;
+    rhs = rhs < 0 ? -rhs : rhs;
+    if (lhs == 0) { return rhs; }
+    if (rhs == 0) { return lhs; }
+    while (rhs != 0) {
+      const std::int64_t remainder = lhs % rhs;
+      lhs                          = rhs;
+      rhs                          = remainder;
+    }
+    return lhs;
+  }
+};
+
+template <typename f_t>
+struct integer_coeff_for_integer_var_transform_t {
+  __device__ std::int64_t operator()(thrust::tuple<f_t, var_t> coeff_with_type) const
+  {
+    const f_t coefficient = thrust::get<0>(coeff_with_type);
+    const var_t var_type  = thrust::get<1>(coeff_with_type);
+    if (var_type != var_t::INTEGER) { return std::int64_t{0}; }
+
+    const f_t abs_coefficient = raft::abs(coefficient);
+    if (!isfinite(abs_coefficient) || abs_coefficient <= f_t(0)) { return std::int64_t{0}; }
+
+    const f_t rounded_abs_coefficient = round(abs_coefficient);
+    const f_t tolerance_scale         = abs_coefficient > f_t(1) ? abs_coefficient : f_t(1);
+    const f_t integrality_tolerance =
+      static_cast<f_t>(integer_coefficient_rel_tol) * tolerance_scale;
+    if (raft::abs(abs_coefficient - rounded_abs_coefficient) > integrality_tolerance) {
+      return std::int64_t{0};
+    }
+    if (rounded_abs_coefficient <= f_t(0) ||
+        rounded_abs_coefficient > static_cast<f_t>(std::numeric_limits<std::int64_t>::max())) {
+      return std::int64_t{0};
+    }
+    return static_cast<std::int64_t>(rounded_abs_coefficient);
+  }
+};
+
+template <typename i_t, typename f_t>
+void compute_row_inf_norm(
+  const cuopt::linear_programming::optimization_problem_t<i_t, f_t>& op_problem,
+  rmm::device_uvector<std::uint8_t>& temp_storage,
+  size_t temp_storage_bytes,
+  rmm::device_uvector<f_t>& row_inf_norm,
+  rmm::cuda_stream_view stream_view)
+{
+  const auto& matrix_values  = op_problem.get_constraint_matrix_values();
+  const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets();
+  auto coeff_abs_iter =
+    thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t<f_t>{});
+  size_t current_bytes = temp_storage_bytes;
+  RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(),
+                                                   current_bytes,
+                                                   coeff_abs_iter,
+                                                   row_inf_norm.data(),
+                                                   op_problem.get_n_constraints(),
+                                                   matrix_offsets.data(),
+                                                   matrix_offsets.data() + 1,
+                                                   max_op_t<f_t>{},
+                                                   f_t(0),
+                                                   stream_view));
+}
+
+template <typename i_t, typename f_t>
+void compute_row_integer_gcd(
+  const cuopt::linear_programming::optimization_problem_t<i_t, f_t>& op_problem,
+  rmm::device_uvector<std::uint8_t>& temp_storage,
+  size_t temp_storage_bytes,
+  rmm::device_uvector<std::int64_t>& row_integer_gcd,
+  rmm::cuda_stream_view stream_view)
+{
+  const auto& matrix_values  = op_problem.get_constraint_matrix_values();
+  const auto& matrix_indices = op_problem.get_constraint_matrix_indices();
+  const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets();
+  const auto& variable_types = op_problem.get_variable_types();
+  if (variable_types.size() != static_cast<size_t>(op_problem.get_n_variables())) {
+    thrust::fill(op_problem.get_handle_ptr()->get_thrust_policy(),
+                 row_integer_gcd.begin(),
+                 row_integer_gcd.end(),
+                 std::int64_t{0});
+    return;
+  }
+  auto variable_type_per_nnz =
+    thrust::make_permutation_iterator(variable_types.data(), matrix_indices.data());
+  auto coeff_and_type_iter =
+    thrust::make_zip_iterator(thrust::make_tuple(matrix_values.data(), variable_type_per_nnz));
+  auto integer_coeff_iter = thrust::make_transform_iterator(
+    coeff_and_type_iter, integer_coeff_for_integer_var_transform_t<f_t>{});
+  size_t current_bytes = temp_storage_bytes;
+  RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(),
+                                                   current_bytes,
+                                                   integer_coeff_iter,
+                                                   row_integer_gcd.data(),
+                                                   op_problem.get_n_constraints(),
+                                                   matrix_offsets.data(),
+                                                   matrix_offsets.data() + 1,
+                                                   gcd_op_t{},
+                                                   std::int64_t{0},
+                                                   stream_view));
+}
+
+template <typename i_t, typename f_t>
+void compute_big_m_skip_rows(
+  const cuopt::linear_programming::optimization_problem_t<i_t, f_t>& op_problem,
+  rmm::device_uvector<std::uint8_t>& temp_storage,
+  size_t temp_storage_bytes,
+  rmm::device_uvector<f_t>& row_inf_norm,
+  rmm::device_uvector<f_t>& row_min_nonzero,
+  rmm::device_uvector<i_t>& row_nonzero_count,
+  rmm::device_uvector<i_t>& row_skip_scaling)
+{
+  const auto& matrix_values  = op_problem.get_constraint_matrix_values();
+  const auto& matrix_offsets = op_problem.get_constraint_matrix_offsets();
+  const auto stream_view     = op_problem.get_handle_ptr()->get_stream();
+  auto coeff_abs_iter =
+    thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t<f_t>{});
+  auto coeff_nonzero_min_iter =
+    thrust::make_transform_iterator(matrix_values.data(), nonzero_abs_or_inf_transform_t<f_t>{});
+  auto coeff_nonzero_count_iter =
+    thrust::make_transform_iterator(matrix_values.data(), nonzero_count_transform_t<i_t, f_t>{});
+
+  size_t max_bytes = temp_storage_bytes;
+  RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(),
+                                                   max_bytes,
+                                                   coeff_abs_iter,
+                                                   row_inf_norm.data(),
+                                                   op_problem.get_n_constraints(),
+                                                   matrix_offsets.data(),
+                                                   matrix_offsets.data() + 1,
+                                                   max_op_t<f_t>{},
+                                                   f_t(0),
+                                                   stream_view));
+  size_t min_bytes = temp_storage_bytes;
+  RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(),
+                                                   min_bytes,
+                                                   coeff_nonzero_min_iter,
+                                                   row_min_nonzero.data(),
+                                                   op_problem.get_n_constraints(),
+                                                   matrix_offsets.data(),
+                                                   matrix_offsets.data() + 1,
+                                                   min_op_t<f_t>{},
+                                                   std::numeric_limits<f_t>::infinity(),
+                                                   stream_view));
+  size_t count_bytes = temp_storage_bytes;
+  RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(temp_storage.data(),
+                                                   count_bytes,
+                                                   coeff_nonzero_count_iter,
+                                                   row_nonzero_count.data(),
+                                                   op_problem.get_n_constraints(),
+                                                   matrix_offsets.data(),
+                                                   matrix_offsets.data() + 1,
+                                                   thrust::plus<i_t>{},
+                                                   i_t(0),
+                                                   stream_view));
+
+  auto row_begin = thrust::make_zip_iterator(
+    thrust::make_tuple(row_inf_norm.begin(), row_min_nonzero.begin(), row_nonzero_count.begin()));
+  auto row_end = thrust::make_zip_iterator(
+    thrust::make_tuple(row_inf_norm.end(), row_min_nonzero.end(), row_nonzero_count.end()));
+  thrust::transform(
+    op_problem.get_handle_ptr()->get_thrust_policy(),
+    row_begin,
+    row_end,
+    row_skip_scaling.begin(),
+    [] __device__(auto row_info) -> i_t {
+      const f_t row_norm          = thrust::get<0>(row_info);
+      const f_t row_min_non_zero  = thrust::get<1>(row_info);
+      const i_t row_non_zero_size = thrust::get<2>(row_info);
+      if (row_non_zero_size < i_t(2) || row_min_non_zero >= std::numeric_limits<f_t>::infinity()) {
+        return i_t(0);
+      }
+
+      const f_t row_ratio = row_norm / row_min_non_zero;
+      return row_norm >= static_cast<f_t>(big_m_abs_threshold) &&
+                 row_ratio >= static_cast<f_t>(big_m_ratio_threshold)
+               ? i_t(1)
+               : i_t(0);
+    });
+}
+
+template <typename i_t, typename f_t>
+void scale_objective(cuopt::linear_programming::optimization_problem_t<i_t, f_t>& op_problem)
+{
+  auto& obj_coefficients = op_problem.get_objective_coefficients();
+  const i_t n_cols       = op_problem.get_n_variables();
+  if (n_cols == 0) { return; }
+
+  const auto* handle_ptr = op_problem.get_handle_ptr();
+
+  f_t min_abs_obj = thrust::transform_reduce(handle_ptr->get_thrust_policy(),
+                                             obj_coefficients.begin(),
+                                             obj_coefficients.end(),
+                                             nonzero_abs_or_inf_transform_t<f_t>{},
+                                             std::numeric_limits<f_t>::infinity(),
+                                             min_op_t<f_t>{});
+
+  f_t max_abs_obj = thrust::transform_reduce(handle_ptr->get_thrust_policy(),
+                                             obj_coefficients.begin(),
+                                             obj_coefficients.end(),
+                                             abs_value_transform_t<f_t>{},
+                                             f_t(0),
+                                             max_op_t<f_t>{});
+
+  if (!std::isfinite(static_cast<double>(min_abs_obj)) || min_abs_obj <= f_t(0) ||
+      max_abs_obj <= f_t(0)) {
+    CUOPT_LOG_INFO("MIP_OBJ_SCALING skipped: no finite nonzero objective coefficients");
+    return;
+  }
+
+  if (static_cast<double>(min_abs_obj) >= min_abs_objective_coefficient_threshold) {
+    CUOPT_LOG_INFO("MIP_OBJ_SCALING skipped: min_abs_coeff=%g already above threshold=%g",
+                   static_cast<double>(min_abs_obj),
+                   min_abs_objective_coefficient_threshold);
+    return;
+  }
+
+  double raw_scale = min_abs_objective_coefficient_threshold / static_cast<double>(min_abs_obj);
+  double scale     = std::min(raw_scale, max_obj_scaling_coefficient);
+
+  double post_max = static_cast<double>(max_abs_obj) * scale;
+  if (post_max > 1.0e6) {
+    CUOPT_LOG_INFO("MIP_OBJ_SCALING skipped: would push max_coeff from %g to %g (limit 1e6)",
+                   static_cast<double>(max_abs_obj),
+                   post_max);
+    return;
+  }
+
+  f_t scale_f = static_cast<f_t>(scale);
+  thrust::transform(handle_ptr->get_thrust_policy(),
+                    obj_coefficients.begin(),
+                    obj_coefficients.end(),
+                    obj_coefficients.begin(),
+                    [scale_f] __device__(f_t c) -> f_t { return c * scale_f; });
+
+  f_t old_sf  = op_problem.get_objective_scaling_factor();
+  f_t old_off = op_problem.get_objective_offset();
+  op_problem.set_objective_scaling_factor(old_sf / scale_f);
+  op_problem.set_objective_offset(old_off * scale_f);
+
+  CUOPT_LOG_INFO(
+    "MIP_OBJ_SCALING applied: min_abs_coeff=%g max_abs_coeff=%g scale=%g new_scaling_factor=%g",
+    static_cast<double>(min_abs_obj),
+    static_cast<double>(max_abs_obj),
+    scale,
+    static_cast<double>(old_sf / scale_f));
+}
+
+template <typename i_t, typename f_t>
+mip_scaling_strategy_t<i_t, f_t>::mip_scaling_strategy_t(
+  typename mip_scaling_strategy_t<i_t, f_t>::optimization_problem_type_t& op_problem_scaled)
+  : handle_ptr_(op_problem_scaled.get_handle_ptr()),
+    stream_view_(handle_ptr_->get_stream()),
+    op_problem_scaled_(op_problem_scaled)
+{
+}
+
+template <typename i_t, typename f_t>
+size_t dry_run_cub(const cuopt::linear_programming::optimization_problem_t<i_t, f_t>& op_problem,
+                   i_t n_rows,
+                   rmm::device_uvector<f_t>& row_inf_norm,
+                   rmm::device_uvector<f_t>& row_min_nonzero,
+                   rmm::device_uvector<i_t>& row_nonzero_count,
+                   rmm::device_uvector<std::int64_t>& row_integer_gcd,
+                   rmm::cuda_stream_view stream_view)
+{
+  const auto& matrix_values     = op_problem.get_constraint_matrix_values();
+  const auto& matrix_indices    = op_problem.get_constraint_matrix_indices();
+  const auto& matrix_offsets    = op_problem.get_constraint_matrix_offsets();
+  const auto& variable_types    = op_problem.get_variable_types();
+  size_t temp_storage_bytes     = 0;
+  size_t current_required_bytes = 0;
+
+  auto coeff_abs_iter =
+    thrust::make_transform_iterator(matrix_values.data(), abs_value_transform_t<f_t>{});
+  RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr,
+                                                   current_required_bytes,
+                                                   coeff_abs_iter,
+                                                   row_inf_norm.data(),
+                                                   n_rows,
+                                                   matrix_offsets.data(),
+                                                   matrix_offsets.data() + 1,
+                                                   max_op_t<f_t>{},
+                                                   f_t(0),
+                                                   stream_view));
+  temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes);
+
+  auto coeff_nonzero_min_iter =
+    thrust::make_transform_iterator(matrix_values.data(), nonzero_abs_or_inf_transform_t<f_t>{});
+  RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr,
+                                                   current_required_bytes,
+                                                   coeff_nonzero_min_iter,
+                                                   row_min_nonzero.data(),
+                                                   n_rows,
+                                                   matrix_offsets.data(),
+                                                   matrix_offsets.data() + 1,
+                                                   min_op_t<f_t>{},
+                                                   std::numeric_limits<f_t>::infinity(),
+                                                   stream_view));
+  temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes);
+
+  auto coeff_nonzero_count_iter =
+    thrust::make_transform_iterator(matrix_values.data(), nonzero_count_transform_t<i_t, f_t>{});
+  RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr,
+                                                   current_required_bytes,
+                                                   coeff_nonzero_count_iter,
+                                                   row_nonzero_count.data(),
+                                                   n_rows,
+                                                   matrix_offsets.data(),
+                                                   matrix_offsets.data() + 1,
+                                                   thrust::plus<i_t>{},
+                                                   i_t(0),
+                                                   stream_view));
+  temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes);
+
+  if (variable_types.size() == static_cast<size_t>(op_problem.get_n_variables())) {
+    auto variable_type_per_nnz =
+      thrust::make_permutation_iterator(variable_types.data(), matrix_indices.data());
+    auto coeff_and_type_iter =
+      thrust::make_zip_iterator(thrust::make_tuple(matrix_values.data(), variable_type_per_nnz));
+    auto integer_coeff_iter = thrust::make_transform_iterator(
+      coeff_and_type_iter, integer_coeff_for_integer_var_transform_t<f_t>{});
+    RAFT_CUDA_TRY(cub::DeviceSegmentedReduce::Reduce(nullptr,
+                                                     current_required_bytes,
+                                                     integer_coeff_iter,
+                                                     row_integer_gcd.data(),
+                                                     n_rows,
+                                                     matrix_offsets.data(),
+                                                     matrix_offsets.data() + 1,
+                                                     gcd_op_t{},
+                                                     std::int64_t{0},
+                                                     stream_view));
+    temp_storage_bytes = std::max(temp_storage_bytes, current_required_bytes);
+  }
+
+  return temp_storage_bytes;
+}
+
+template <typename i_t, typename f_t>
+void mip_scaling_strategy_t<i_t, f_t>::scale_problem()
+{
+  raft::common::nvtx::range fun_scope("mip_scale_problem");
+
+  auto& matrix_values           = op_problem_scaled_.get_constraint_matrix_values();
+  auto& matrix_offsets          = op_problem_scaled_.get_constraint_matrix_offsets();
+  auto& constraint_bounds       = op_problem_scaled_.get_constraint_bounds();
+  auto& constraint_lower_bounds = op_problem_scaled_.get_constraint_lower_bounds();
+  auto& constraint_upper_bounds = op_problem_scaled_.get_constraint_upper_bounds();
+  const i_t n_rows              = op_problem_scaled_.get_n_constraints();
+  const i_t n_cols              = op_problem_scaled_.get_n_variables();
+  const i_t nnz                 = op_problem_scaled_.get_nnz();
+
+  scale_objective(op_problem_scaled_);
+
+  if (n_rows == 0 || nnz <= 0) { return; }
+  cuopt_assert(constraint_lower_bounds.size() == static_cast<size_t>(n_rows),
+               "constraint_lower_bounds must be provided for row scaling");
+  cuopt_assert(constraint_upper_bounds.size() == static_cast<size_t>(n_rows),
+               "constraint_upper_bounds must be provided for row scaling");
+  cuopt_assert(constraint_bounds.size() == size_t{0} ||
+                 constraint_bounds.size() == static_cast<size_t>(n_rows),
+               "constraint_bounds must be empty or have one value per constraint");
+
+  rmm::device_uvector<f_t> row_inf_norm(static_cast<size_t>(n_rows), stream_view_);
+  rmm::device_uvector<f_t> row_min_nonzero(static_cast<size_t>(n_rows), stream_view_);
+  rmm::device_uvector<i_t> row_nonzero_count(static_cast<size_t>(n_rows), stream_view_);
+  rmm::device_uvector<std::int64_t> row_integer_gcd(static_cast<size_t>(n_rows), stream_view_);
+  rmm::device_uvector<f_t> row_rhs_magnitude(static_cast<size_t>(n_rows), stream_view_);
+  rmm::device_uvector<i_t> row_skip_scaling(static_cast<size_t>(n_rows), stream_view_);
+  thrust::fill(
+    handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(0));
+  rmm::device_uvector<f_t> iteration_scaling(static_cast<size_t>(n_rows), stream_view_);
+  rmm::device_uvector<f_t> cumulative_scaling(static_cast<size_t>(n_rows), stream_view_);
+  thrust::fill(
+    handle_ptr_->get_thrust_policy(), cumulative_scaling.begin(), cumulative_scaling.end(), f_t(1));
+  rmm::device_uvector<i_t> coefficient_row_index(static_cast<size_t>(nnz), stream_view_);
+  rmm::device_uvector<double> ref_log2_values(static_cast<size_t>(n_rows), stream_view_);
+
+  thrust::upper_bound(handle_ptr_->get_thrust_policy(),
+                      matrix_offsets.begin(),
+                      matrix_offsets.end(),
+                      thrust::make_counting_iterator<i_t>(0),
+                      thrust::make_counting_iterator<i_t>(nnz),
+                      coefficient_row_index.begin());
+  thrust::transform(
+    handle_ptr_->get_thrust_policy(),
+    coefficient_row_index.begin(),
+    coefficient_row_index.end(),
+    coefficient_row_index.begin(),
+    [] __device__(i_t row_upper_bound_idx) -> i_t { return row_upper_bound_idx - 1; });
+
+  size_t temp_storage_bytes = dry_run_cub(op_problem_scaled_,
+                                          n_rows,
+                                          row_inf_norm,
+                                          row_min_nonzero,
+                                          row_nonzero_count,
+                                          row_integer_gcd,
+                                          stream_view_);
+
+  rmm::device_uvector<std::uint8_t> temp_storage(temp_storage_bytes, stream_view_);
+  compute_big_m_skip_rows(op_problem_scaled_,
+                          temp_storage,
+                          temp_storage_bytes,
+                          row_inf_norm,
+                          row_min_nonzero,
+                          row_nonzero_count,
+                          row_skip_scaling);
+
+  i_t big_m_rows = thrust::count(
+    handle_ptr_->get_thrust_policy(), row_skip_scaling.begin(), row_skip_scaling.end(), i_t(1));
+
+  CUOPT_LOG_INFO("MIP row scaling start: rows=%d cols=%d max_iterations=%d soft_big_m_rows=%d",
+                 n_rows,
+                 n_cols,
+                 row_scaling_max_iterations,
+                 big_m_rows);
+
+  double previous_row_log2_spread = std::numeric_limits<double>::infinity();
+  for (int iteration = 0; iteration < row_scaling_max_iterations; ++iteration) {
+    compute_row_inf_norm(
+      op_problem_scaled_, temp_storage, temp_storage_bytes, row_inf_norm, stream_view_);
+    compute_row_integer_gcd(
+      op_problem_scaled_, temp_storage, temp_storage_bytes, row_integer_gcd, stream_view_);
+
+    using row_stats_t        = thrust::tuple<double, double, double, double>;
+    auto row_norm_log2_stats = thrust::transform_reduce(
+      handle_ptr_->get_thrust_policy(),
+      row_inf_norm.begin(),
+      row_inf_norm.end(),
+      [] __device__(f_t row_norm) -> row_stats_t {
+        if (row_norm == f_t(0)) {
+          return {0.0,
+                  0.0,
+                  std::numeric_limits<double>::infinity(),
+                  -std::numeric_limits<double>::infinity()};
+        }
+        const double row_log2 = log2(static_cast<double>(row_norm));
+        return {row_log2, 1.0, row_log2, row_log2};
+      },
+      row_stats_t{0.0,
+                  0.0,
+                  std::numeric_limits<double>::infinity(),
+                  -std::numeric_limits<double>::infinity()},
+      [] __device__(row_stats_t a, row_stats_t b) -> row_stats_t {
+        return {thrust::get<0>(a) + thrust::get<0>(b),
+                thrust::get<1>(a) + thrust::get<1>(b),
+                min_op_t<double>{}(thrust::get<2>(a), thrust::get<2>(b)),
+                max_op_t<double>{}(thrust::get<3>(a), thrust::get<3>(b))};
+      });
+    const i_t active_row_count = static_cast<i_t>(thrust::get<1>(row_norm_log2_stats));
+    if (active_row_count == 0) { break; }
+    const double row_log2_spread =
+      thrust::get<3>(row_norm_log2_stats) - thrust::get<2>(row_norm_log2_stats);
+    if (iteration == 0 && row_log2_spread <= row_scaling_min_initial_log2_spread) {
+      CUOPT_LOG_INFO("MIP row scaling skipped: initial_log2_spread=%g threshold=%g",
+                     row_log2_spread,
+                     row_scaling_min_initial_log2_spread);
+      break;
+    }
+    if (std::isfinite(previous_row_log2_spread)) {
+      const double spread_improvement = previous_row_log2_spread - row_log2_spread;
+      if (spread_improvement <=
+          row_scaling_spread_rel_tol * std::max(1.0, previous_row_log2_spread)) {
+        break;
+      }
+    }
+    previous_row_log2_spread = row_log2_spread;
+
+    thrust::transform(handle_ptr_->get_thrust_policy(),
+                      thrust::make_zip_iterator(thrust::make_tuple(
+                        constraint_lower_bounds.begin(), constraint_upper_bounds.begin())),
+                      thrust::make_zip_iterator(thrust::make_tuple(constraint_lower_bounds.end(),
+                                                                   constraint_upper_bounds.end())),
+                      row_rhs_magnitude.begin(),
+                      [] __device__(auto row_bounds) -> f_t {
+                        const f_t lower_bound = thrust::get<0>(row_bounds);
+                        const f_t upper_bound = thrust::get<1>(row_bounds);
+                        f_t rhs_norm          = f_t(0);
+                        if (isfinite(lower_bound)) { rhs_norm = raft::abs(lower_bound); }
+                        if (isfinite(upper_bound)) {
+                          const f_t upper_abs = raft::abs(upper_bound);
+                          rhs_norm            = upper_abs > rhs_norm ? upper_abs : rhs_norm;
+                        }
+                        return rhs_norm;
+                      });
+
+    // Fix A+C: median-based target norm excluding big-M rows (robust to outliers)
+    constexpr double neg_inf_sentinel = -1.0e300;
+    thrust::transform(handle_ptr_->get_thrust_policy(),
+                      thrust::make_zip_iterator(thrust::make_tuple(
+                        row_inf_norm.begin(), row_rhs_magnitude.begin(), row_skip_scaling.begin())),
+                      thrust::make_zip_iterator(thrust::make_tuple(
+                        row_inf_norm.end(), row_rhs_magnitude.end(), row_skip_scaling.end())),
+                      ref_log2_values.begin(),
+                      [neg_inf_sentinel] __device__(auto row_info) -> double {
+                        const f_t row_norm = thrust::get<0>(row_info);
+                        const f_t rhs_norm = thrust::get<1>(row_info);
+                        const i_t is_big_m = thrust::get<2>(row_info);
+                        if (is_big_m) { return neg_inf_sentinel - 1.0; }
+                        const f_t reference_norm = row_norm > rhs_norm ? row_norm : rhs_norm;
+                        if (reference_norm <= f_t(0)) { return neg_inf_sentinel - 1.0; }
+                        return log2(static_cast<double>(reference_norm));
+                      });
+    thrust::sort(handle_ptr_->get_thrust_policy(), ref_log2_values.begin(), ref_log2_values.end());
+    auto valid_begin_iter = thrust::lower_bound(handle_ptr_->get_thrust_policy(),
+                                                ref_log2_values.begin(),
+                                                ref_log2_values.end(),
+                                                neg_inf_sentinel);
+    i_t n_invalid         = static_cast<i_t>(valid_begin_iter - ref_log2_values.begin());
+    i_t valid_count       = n_rows - n_invalid;
+    if (valid_count == 0) { break; }
+    i_t median_idx = n_invalid + valid_count / 2;
+    double h_median_log2;
+    RAFT_CUDA_TRY(cudaMemcpyAsync(&h_median_log2,
+                                  ref_log2_values.data() + median_idx,
+                                  sizeof(double),
+                                  cudaMemcpyDeviceToHost,
+                                  stream_view_));
+    handle_ptr_->sync_stream();
+    f_t target_norm = static_cast<f_t>(exp2(h_median_log2));
+    cuopt_assert(std::isfinite(static_cast<double>(target_norm)), "target_norm must be finite");
+    cuopt_assert(target_norm > f_t(0), "target_norm must be positive");
+
+    thrust::transform(
+      handle_ptr_->get_thrust_policy(),
+      thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.begin(),
+                                                   row_skip_scaling.begin(),
+                                                   row_integer_gcd.begin(),
+                                                   cumulative_scaling.begin())),
+      thrust::make_zip_iterator(thrust::make_tuple(row_inf_norm.end(),
+                                                   row_skip_scaling.end(),
+                                                   row_integer_gcd.end(),
+                                                   cumulative_scaling.end())),
+      iteration_scaling.begin(),
+      [target_norm] __device__(auto row_info) -> f_t {
+        const f_t row_norm               = thrust::get<0>(row_info);
+        const i_t is_big_m               = thrust::get<1>(row_info);
+        const std::int64_t row_coeff_gcd = thrust::get<2>(row_info);
+        const f_t cum_scale              = thrust::get<3>(row_info);
+        if (row_norm == f_t(0)) { return f_t(1); }
+
+        const f_t desired_scaling = target_norm / row_norm;
+        if (!isfinite(desired_scaling) || desired_scaling <= f_t(0)) { return f_t(1); }
+
+        f_t min_scaling = is_big_m ? static_cast<f_t>(row_scaling_big_m_soft_min_factor)
+                                   : static_cast<f_t>(row_scaling_min_factor);
+        f_t max_scaling = is_big_m ? static_cast<f_t>(row_scaling_big_m_soft_max_factor)
+                                   : static_cast<f_t>(row_scaling_max_factor);
+
+        // Fix B: prevent scaling UP rows that are already numerically large
+        if (!is_big_m && row_norm >= static_cast<f_t>(big_m_abs_threshold)) {
+          if (max_scaling > f_t(1)) { max_scaling = f_t(1); }
+        }
+
+        const f_t cum_lower = static_cast<f_t>(cumulative_row_scaling_min) / cum_scale;
+        const f_t cum_upper = static_cast<f_t>(cumulative_row_scaling_max) / cum_scale;
+        if (cum_lower > min_scaling) { min_scaling = cum_lower; }
+        if (cum_upper < max_scaling) { max_scaling = cum_upper; }
+        if (min_scaling > max_scaling) { return f_t(1); }
+
+        f_t row_scaling = desired_scaling;
+        if (row_scaling < min_scaling) { row_scaling = min_scaling; }
+        if (row_scaling > max_scaling) { row_scaling = max_scaling; }
+
+        // Fix E: prefer power-of-two scaling for integer rows (exact in IEEE 754)
+        if (row_coeff_gcd > std::int64_t{0}) {
+          const f_t gcd_value = static_cast<f_t>(row_coeff_gcd);
+          if (isfinite(gcd_value) && gcd_value > f_t(0)) {
+            const double log2_scaling = log2(static_cast<double>(row_scaling));
+            int k_candidates[3]       = {static_cast<int>(round(log2_scaling)),
+                                         static_cast<int>(floor(log2_scaling)),
+                                         static_cast<int>(ceil(log2_scaling))};
+            bool found_pow2           = false;
+            for (int ci = 0; ci < 3 && !found_pow2; ++ci) {
+              int k    = k_candidates[ci];
+              f_t pow2 = static_cast<f_t>(exp2(static_cast<double>(k)));
+              if (pow2 < min_scaling || pow2 > max_scaling) { continue; }
+              bool preserves =
+                (k >= 0) || (-k < 63 && (row_coeff_gcd % (std::int64_t{1} << (-k))) == 0);
+              if (preserves) {
+                row_scaling = pow2;
+                found_pow2  = true;
+              }
+            }
+            if (!found_pow2) {
+              std::int64_t min_mult = static_cast<std::int64_t>(
+                ceil(static_cast<double>(min_scaling * gcd_value -
+                                         static_cast<f_t>(integer_multiplier_rounding_tolerance))));
+              std::int64_t max_mult = static_cast<std::int64_t>(floor(
+                static_cast<double>(max_scaling * gcd_value +
+                                    static_cast<f_t>(integer_multiplier_rounding_tolerance))));
+              if (min_mult < std::int64_t{1}) { min_mult = std::int64_t{1}; }
+              if (max_mult < min_mult) { max_mult = min_mult; }
+              std::int64_t proj_mult = static_cast<std::int64_t>(round(row_scaling * gcd_value));
+              if (proj_mult < min_mult) { proj_mult = min_mult; }
+              if (proj_mult > max_mult) { proj_mult = max_mult; }
+              row_scaling = static_cast<f_t>(proj_mult) / gcd_value;
+            }
+          }
+        }
+        return row_scaling;
+      });
+
+    i_t scaled_rows =
+      thrust::count_if(handle_ptr_->get_thrust_policy(),
+                       iteration_scaling.begin(),
+                       iteration_scaling.end(),
+                       [] __device__(f_t row_scale) -> bool { return row_scale != f_t(1); });
+    CUOPT_LOG_INFO(
+      "MIP_SCALING_METRICS iteration=%d log2_spread=%g target_norm=%g scaled_rows=%d "
+      "valid_rows=%d",
+      iteration,
+      row_log2_spread,
+      static_cast<double>(target_norm),
+      scaled_rows,
+      valid_count);
+    if (scaled_rows == 0) { break; }
+
+    thrust::transform(
+      handle_ptr_->get_thrust_policy(),
+      matrix_values.begin(),
+      matrix_values.end(),
+      thrust::make_permutation_iterator(iteration_scaling.begin(), coefficient_row_index.begin()),
+      matrix_values.begin(),
+      thrust::multiplies<f_t>{});
+
+    thrust::transform(handle_ptr_->get_thrust_policy(),
+                      cumulative_scaling.begin(),
+                      cumulative_scaling.end(),
+                      iteration_scaling.begin(),
+                      cumulative_scaling.begin(),
+                      thrust::multiplies<f_t>{});
+
+    thrust::transform(handle_ptr_->get_thrust_policy(),
+                      constraint_lower_bounds.begin(),
+                      constraint_lower_bounds.end(),
+                      iteration_scaling.begin(),
+                      constraint_lower_bounds.begin(),
+                      thrust::multiplies<f_t>{});
+    thrust::transform(handle_ptr_->get_thrust_policy(),
+                      constraint_upper_bounds.begin(),
+                      constraint_upper_bounds.end(),
+                      iteration_scaling.begin(),
+                      constraint_upper_bounds.begin(),
+                      thrust::multiplies<f_t>{});
+    if (constraint_bounds.size() == static_cast<size_t>(n_rows)) {
+      thrust::transform(handle_ptr_->get_thrust_policy(),
+                        constraint_bounds.begin(),
+                        constraint_bounds.end(),
+                        iteration_scaling.begin(),
+                        constraint_bounds.begin(),
+                        thrust::multiplies<f_t>{});
+    }
+  }
+
+  CUOPT_LOG_INFO("MIP_SCALING_SUMMARY rows=%d bigm_rows=%d final_spread=%g",
+                 n_rows,
+                 big_m_rows,
+                 previous_row_log2_spread);
+
+  const f_t post_max_coeff         = thrust::transform_reduce(handle_ptr_->get_thrust_policy(),
+                                                      matrix_values.begin(),
+                                                      matrix_values.end(),
+                                                      abs_value_transform_t<f_t>{},
+                                                      f_t(0),
+                                                      max_op_t<f_t>{});
+  const f_t post_min_nonzero_coeff = thrust::transform_reduce(handle_ptr_->get_thrust_policy(),
+                                                              matrix_values.begin(),
+                                                              matrix_values.end(),
+                                                              nonzero_abs_or_inf_transform_t<f_t>{},
+                                                              std::numeric_limits<f_t>::infinity(),
+                                                              min_op_t<f_t>{});
+  if (std::isfinite(static_cast<double>(post_max_coeff)) &&
+      std::isfinite(static_cast<double>(post_min_nonzero_coeff)) &&
+      post_min_nonzero_coeff > f_t(0)) {
+    const double post_ratio =
+      static_cast<double>(post_max_coeff) / static_cast<double>(post_min_nonzero_coeff);
+    if (post_ratio > post_scaling_max_ratio_warn) {
+      CUOPT_LOG_WARN(
+        "MIP row scaling: extreme coefficient ratio after scaling: max=%g min_nz=%g ratio=%g",
+        static_cast<double>(post_max_coeff),
+        static_cast<double>(post_min_nonzero_coeff),
+        post_ratio);
+    }
+  }
+
+  CUOPT_LOG_INFO("MIP row scaling completed");
+  op_problem_scaled_.print_scaling_information();
+}
+
+#define INSTANTIATE(F_TYPE) template class mip_scaling_strategy_t<int, F_TYPE>;
+
+#if MIP_INSTANTIATE_FLOAT
+INSTANTIATE(float)
+#endif
+
+#if MIP_INSTANTIATE_DOUBLE
+INSTANTIATE(double)
+#endif
+
+}  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/mip_scaling_strategy.cuh b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh
new file mode 100644
index 0000000000..0fc83b8cb9
--- /dev/null
+++ b/cpp/src/mip_heuristics/mip_scaling_strategy.cuh
@@ -0,0 +1,32 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <cuopt/linear_programming/optimization_problem.hpp>
+
+#include <raft/core/handle.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cuopt::linear_programming::detail {
+
+template <typename i_t, typename f_t>
+class mip_scaling_strategy_t {
+ public:
+  using optimization_problem_type_t = cuopt::linear_programming::optimization_problem_t<i_t, f_t>;
+  explicit mip_scaling_strategy_t(optimization_problem_type_t& op_problem_scaled);
+
+  void scale_problem();
+
+ private:
+  raft::handle_t const* handle_ptr_{nullptr};
+  rmm::cuda_stream_view stream_view_;
+  optimization_problem_type_t& op_problem_scaled_;
+};
+
+}  // namespace cuopt::linear_programming::detail
diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
index 20a586f6fb..85e495608f 100644
--- a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
+++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp
@@ -669,6 +669,10 @@ std::optional<third_party_presolve_result_t<i_t, f_t>> third_party_presolve_t<i_
 
   auto opt_problem = build_optimization_problem<i_t, f_t>(
     papilo_problem, op_problem.get_handle_ptr(), category, maximize_);
+  // metadata from original optimization problem that is not filled
+  opt_problem.set_problem_name(op_problem.get_problem_name());
+  opt_problem.set_objective_scaling_factor(op_problem.get_objective_scaling_factor());
+  // when an objective offset outside (e.g. from mps file), handle accordingly
   auto col_flags = papilo_problem.getColFlags();
   std::vector<i_t> implied_integer_indices;
   for (size_t i = 0; i < col_flags.size(); i++) {
diff --git a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu
index e2bbc8feb1..84415f5372 100644
--- a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu
+++ b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu
@@ -8,7 +8,6 @@
 #include "relaxed_lp.cuh"
 
 #include <cuopt/error.hpp>
-#include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
 #include <mip_heuristics/mip_constants.hpp>
 #include <mip_heuristics/utils.cuh>
 #include <pdlp/solve.cuh>
diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu
index f5a2172f2e..8733ecfbab 100644
--- a/cpp/src/mip_heuristics/solve.cu
+++ b/cpp/src/mip_heuristics/solve.cu
@@ -9,13 +9,13 @@
 #include <cuopt/linear_programming/solve_remote.hpp>
 
 #include <mip_heuristics/mip_constants.hpp>
+#include <mip_heuristics/mip_scaling_strategy.cuh>
 #include <mip_heuristics/presolve/third_party_presolve.hpp>
 #include <mip_heuristics/presolve/trivial_presolve.cuh>
 #include <mip_heuristics/solver.cuh>
 #include <mip_heuristics/utilities/sort_csr.cuh>
 #include <mip_heuristics/utils.cuh>
 
-#include <pdlp/initial_scaling_strategy/initial_scaling.cuh>
 #include <pdlp/pdlp.cuh>
 #include <pdlp/restart_strategy/pdlp_restart_strategy.cuh>
 #include <pdlp/step_size_strategy/adaptive_step_size_strategy.hpp>
@@ -64,12 +64,7 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
                                  timer_t& timer)
 {
   raft::common::nvtx::range fun_scope("run_mip");
-  auto constexpr const running_mip = true;
 
-  // TODO ask Akif and Alice how was this passed down?
-  auto hyper_params                                     = settings.hyper_params;
-  hyper_params.update_primal_weight_on_initial_solution = false;
-  hyper_params.update_step_size_on_initial_solution     = true;
   if (settings.get_mip_callbacks().size() > 0) {
     auto callback_num_variables = problem.original_problem_ptr->get_n_variables();
     if (problem.has_papilo_presolve_data()) {
@@ -133,33 +128,12 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
                "Size mismatch");
   cuopt_assert(problem.original_problem_ptr->get_n_constraints() == scaled_problem.n_constraints,
                "Size mismatch");
-  detail::pdlp_initial_scaling_strategy_t<i_t, f_t> scaling(
-    scaled_problem.handle_ptr,
-    scaled_problem,
-    hyper_params.default_l_inf_ruiz_iterations,
-    (f_t)hyper_params.default_alpha_pock_chambolle_rescaling,
-    scaled_problem.reverse_coefficients,
-    scaled_problem.reverse_offsets,
-    scaled_problem.reverse_constraints,
-    nullptr,
-    hyper_params,
-    running_mip);
-
-  cuopt_func_call(auto saved_problem = scaled_problem);
-  if (settings.mip_scaling) {
-    scaling.scale_problem();
-    if (settings.initial_solutions.size() > 0) {
-      for (const auto& initial_solution : settings.initial_solutions) {
-        scaling.scale_primal(*initial_solution);
-      }
-    }
-  }
+
   // only call preprocess on scaled problem, so we can compute feasibility on the original problem
   scaled_problem.preprocess_problem();
-  // cuopt_func_call((check_scaled_problem<i_t, f_t>(scaled_problem, saved_problem)));
   detail::trivial_presolve(scaled_problem);
 
-  detail::mip_solver_t<i_t, f_t> solver(scaled_problem, settings, scaling, timer);
+  detail::mip_solver_t<i_t, f_t> solver(scaled_problem, settings, timer);
   if (timer.check_time_limit()) {
     CUOPT_LOG_INFO("Time limit reached before main solve");
     detail::solution_t<i_t, f_t> sol(problem);
@@ -170,17 +144,16 @@ mip_solution_t<i_t, f_t> run_mip(detail::problem_t<i_t, f_t>& problem,
   auto scaled_sol                 = solver.run_solver();
   bool is_feasible_before_scaling = scaled_sol.get_feasible();
   scaled_sol.problem_ptr          = &problem;
-  if (settings.mip_scaling) { scaling.unscale_solutions(scaled_sol); }
   // at this point we need to compute the feasibility on the original problem not the presolved one
   bool is_feasible_after_unscaling = scaled_sol.compute_feasibility();
   if (!scaled_problem.empty && is_feasible_before_scaling != is_feasible_after_unscaling) {
     CUOPT_LOG_WARN(
-      "The feasibility does not match on scaled and unscaled problems. To overcome this issue, "
+      "The feasibility does not match on scaled and original problems. To overcome this issue, "
       "please provide a more numerically stable problem.");
   }
 
   auto sol = scaled_sol.get_solution(
-    is_feasible_before_scaling || is_feasible_after_unscaling, solver.get_solver_stats(), false);
+    is_feasible_after_unscaling || is_feasible_before_scaling, solver.get_solver_stats(), false);
 
   int hidesol =
     std::getenv("CUOPT_MIP_HIDE_SOLUTION") ? atoi(std::getenv("CUOPT_MIP_HIDE_SOLUTION")) : 0;
@@ -241,7 +214,10 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
     }
 
     auto timer = timer_t(time_limit);
-
+    if (settings.mip_scaling) {
+      detail::mip_scaling_strategy_t<i_t, f_t> scaling(op_problem);
+      scaling.scale_problem();
+    }
     double presolve_time = 0.0;
     std::unique_ptr<detail::third_party_presolve_t<i_t, f_t>> presolver;
     std::optional<detail::third_party_presolve_result_t<i_t, f_t>> presolve_result;
diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu
index e6f6d50b62..387c2926af 100644
--- a/cpp/src/mip_heuristics/solver.cu
+++ b/cpp/src/mip_heuristics/solver.cu
@@ -41,14 +41,10 @@ static void init_handler(const raft::handle_t* handle_ptr)
 template <typename i_t, typename f_t>
 mip_solver_t<i_t, f_t>::mip_solver_t(const problem_t<i_t, f_t>& op_problem,
                                      const mip_solver_settings_t<i_t, f_t>& solver_settings,
-                                     pdlp_initial_scaling_strategy_t<i_t, f_t>& scaling,
                                      timer_t timer)
   : op_problem_(op_problem),
     solver_settings_(solver_settings),
-    context(op_problem.handle_ptr,
-            const_cast<problem_t<i_t, f_t>*>(&op_problem),
-            solver_settings,
-            scaling),
+    context(op_problem.handle_ptr, const_cast<problem_t<i_t, f_t>*>(&op_problem), solver_settings),
     timer_(timer)
 {
   init_handler(op_problem.handle_ptr);
diff --git a/cpp/src/mip_heuristics/solver.cuh b/cpp/src/mip_heuristics/solver.cuh
index 1b5fe17244..9b9024a1dc 100644
--- a/cpp/src/mip_heuristics/solver.cuh
+++ b/cpp/src/mip_heuristics/solver.cuh
@@ -20,7 +20,6 @@ class mip_solver_t {
  public:
   explicit mip_solver_t(const problem_t<i_t, f_t>& op_problem,
                         const mip_solver_settings_t<i_t, f_t>& solver_settings,
-                        pdlp_initial_scaling_strategy_t<i_t, f_t>& scaling,
                         timer_t timer);
 
   solution_t<i_t, f_t> run_solver();
diff --git a/cpp/src/mip_heuristics/solver_context.cuh b/cpp/src/mip_heuristics/solver_context.cuh
index baac1dd9d6..d68a19bb61 100644
--- a/cpp/src/mip_heuristics/solver_context.cuh
+++ b/cpp/src/mip_heuristics/solver_context.cuh
@@ -9,7 +9,6 @@
 
 #include <mip_heuristics/problem/problem.cuh>
 #include <mip_heuristics/relaxed_lp/lp_state.cuh>
-#include <pdlp/initial_scaling_strategy/initial_scaling.cuh>
 #include <utilities/work_limit_context.hpp>
 #include <utilities/work_unit_scheduler.hpp>
 
@@ -34,9 +33,8 @@ template <typename i_t, typename f_t>
 struct mip_solver_context_t {
   explicit mip_solver_context_t(raft::handle_t const* handle_ptr_,
                                 problem_t<i_t, f_t>* problem_ptr_,
-                                mip_solver_settings_t<i_t, f_t> settings_,
-                                pdlp_initial_scaling_strategy_t<i_t, f_t>& scaling)
-    : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_), scaling(scaling)
+                                mip_solver_settings_t<i_t, f_t> settings_)
+    : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_)
   {
     cuopt_assert(problem_ptr != nullptr, "problem_ptr is nullptr");
     stats.set_solution_bound(problem_ptr->maximize ? std::numeric_limits<f_t>::infinity()
@@ -53,7 +51,6 @@ struct mip_solver_context_t {
   diversity_manager_t<i_t, f_t>* diversity_manager_ptr{nullptr};
   std::atomic<bool> preempt_heuristic_solver_ = false;
   const mip_solver_settings_t<i_t, f_t> settings;
-  pdlp_initial_scaling_strategy_t<i_t, f_t>& scaling;
   solver_stats_t<i_t, f_t> stats;
   // Work limit context for tracking work units in deterministic mode (shared across all timers in
   // GPU heuristic loop)
diff --git a/cpp/tests/mip/feasibility_jump_tests.cu b/cpp/tests/mip/feasibility_jump_tests.cu
index baa3e9b803..4e8a518522 100644
--- a/cpp/tests/mip/feasibility_jump_tests.cu
+++ b/cpp/tests/mip/feasibility_jump_tests.cu
@@ -12,6 +12,7 @@
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/utilities/internals.hpp>
 #include <mip_heuristics/feasibility_jump/feasibility_jump.cuh>
+#include <mip_heuristics/mip_scaling_strategy.cuh>
 #include <mip_heuristics/solution/solution.cuh>
 #include <mip_heuristics/solver_context.cuh>
 #include <mps_parser/parser.hpp>
@@ -77,16 +78,7 @@ static fj_state_t run_fj(std::string test_instance,
   // run the problem constructor of MIP, so that we do bounds standardization
   detail::problem_t<int, double> problem(op_problem);
   problem.preprocess_problem();
-  detail::pdhg_solver_t<int, double> pdhg_solver(problem.handle_ptr, problem);
-  detail::pdlp_initial_scaling_strategy_t<int, double> scaling(&handle_,
-                                                               problem,
-                                                               10,
-                                                               1.0,
-                                                               pdhg_solver,
-                                                               problem.reverse_coefficients,
-                                                               problem.reverse_offsets,
-                                                               problem.reverse_constraints,
-                                                               true);
+  detail::mip_scaling_strategy_t<int, double> scaling(problem);
 
   auto settings       = mip_solver_settings_t<int, double>{};
   settings.time_limit = 30.;
diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu
index 5e2f08007d..1f825a26f7 100644
--- a/cpp/tests/mip/load_balancing_test.cu
+++ b/cpp/tests/mip/load_balancing_test.cu
@@ -9,11 +9,11 @@
 #include "mip_utils.cuh"
 
 #include <raft/sparse/detail/cusparse_wrappers.h>
+#include <mip_heuristics/mip_scaling_strategy.cuh>
 #include <mip_heuristics/presolve/bounds_presolve.cuh>
 #include <mip_heuristics/presolve/load_balanced_bounds_presolve.cuh>
 #include <mip_heuristics/problem/load_balanced_problem.cuh>
 #include <mps_parser/parser.hpp>
-#include <pdlp/initial_scaling_strategy/initial_scaling.cuh>
 #include <pdlp/utilities/problem_checking.cuh>
 #include <raft/core/handle.hpp>
 #include <raft/util/cudart_utils.hpp>
@@ -128,16 +128,7 @@ void test_multi_probe(std::string path)
   problem_checking_t<int, double>::check_problem_representation(op_problem);
   detail::problem_t<int, double> problem(op_problem);
   mip_solver_settings_t<int, double> default_settings{};
-  detail::pdhg_solver_t<int, double> pdhg_solver(problem.handle_ptr, problem);
-  detail::pdlp_initial_scaling_strategy_t<int, double> scaling(&handle_,
-                                                               problem,
-                                                               10,
-                                                               1.0,
-                                                               problem.reverse_coefficients,
-                                                               problem.reverse_offsets,
-                                                               problem.reverse_constraints,
-                                                               nullptr,
-                                                               true);
+  detail::mip_scaling_strategy_t<int, double> scaling(problem);
   detail::mip_solver_t<int, double> solver(problem, default_settings, scaling, cuopt::timer_t(0));
   detail::load_balanced_problem_t<int, double> lb_problem(problem);
   detail::load_balanced_bounds_presolve_t<int, double> lb_prs(lb_problem, solver.context);
diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu
index 073c153486..003220de9b 100644
--- a/cpp/tests/mip/multi_probe_test.cu
+++ b/cpp/tests/mip/multi_probe_test.cu
@@ -9,10 +9,10 @@
 #include "mip_utils.cuh"
 
 #include <raft/sparse/detail/cusparse_wrappers.h>
+#include <mip_heuristics/mip_scaling_strategy.cuh>
 #include <mip_heuristics/presolve/bounds_presolve.cuh>
 #include <mip_heuristics/presolve/multi_probe.cuh>
 #include <mps_parser/parser.hpp>
-#include <pdlp/initial_scaling_strategy/initial_scaling.cuh>
 #include <pdlp/utilities/problem_checking.cuh>
 #include <raft/core/handle.hpp>
 #include <raft/util/cudart_utils.hpp>
@@ -150,18 +150,7 @@ void test_multi_probe(std::string path)
   problem_checking_t<int, double>::check_problem_representation(op_problem);
   detail::problem_t<int, double> problem(op_problem);
   mip_solver_settings_t<int, double> default_settings{};
-  pdlp_hyper_params::pdlp_hyper_params_t hyper_params{};
-  detail::pdlp_initial_scaling_strategy_t<int, double> scaling(&handle_,
-                                                               problem,
-                                                               10,
-                                                               1.0,
-                                                               problem.reverse_coefficients,
-                                                               problem.reverse_offsets,
-                                                               problem.reverse_constraints,
-                                                               nullptr,
-                                                               hyper_params,
-                                                               true);
-  detail::mip_solver_t<int, double> solver(problem, default_settings, scaling, cuopt::timer_t(0));
+  detail::mip_solver_t<int, double> solver(problem, default_settings, cuopt::timer_t(0));
   detail::bound_presolve_t<int, double> bnd_prb_0(solver.context);
   detail::bound_presolve_t<int, double> bnd_prb_1(solver.context);
   detail::multi_probe_t<int, double> multi_probe_prs(solver.context);
diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
index 59ea62089d..8412c745b5 100644
--- a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
+++ b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
@@ -450,11 +450,6 @@ class SolverConfig(BaseModel):
         description="Set True to run heuristics only, False to run "
         "heuristics and branch and bound for MILP",
     )
-    mip_batch_pdlp_strong_branching: Optional[int] = Field(
-        default=0,
-        description="Set 1 to enable batch PDLP strong branching "
-        "in the MIP solver, 0 to disable.",
-    )
     num_cpu_threads: Optional[int] = Field(
         default=None,
         description="Set the number of CPU threads to use for branch and bound.",  # noqa