diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 988c9c50a..16f503e89 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -331,6 +331,7 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, template i_t dual_push(const lp_problem_t& lp, + const csr_matrix_t& Arow, const simplex_solver_settings_t& settings, f_t start_time, lp_solution_t& solution, @@ -401,11 +402,9 @@ i_t dual_push(const lp_problem_t& lp, es_sparse.x[0] = -delta_zs; // B^T delta_y = -delta_zs*es - std::vector delta_y(m); sparse_vector_t delta_y_sparse(m, 1); sparse_vector_t UTsol_sparse(m, 1); ft.b_transpose_solve(es_sparse, delta_y_sparse, UTsol_sparse); - delta_y_sparse.scatter(delta_y); // We solved B^T delta_y = -delta_zs*es, but for the update we need // U^T*etilde = es. @@ -417,15 +416,23 @@ i_t dual_push(const lp_problem_t& lp, // delta_zN = -N^T delta_y std::vector delta_zN(n - m); - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - f_t dot = 0.0; - for (i_t p = col_start; p < col_end; ++p) { - dot += lp.A.x[p] * delta_y[lp.A.i[p]]; + std::vector delta_expanded(n, 0.); + + // Iterate directly over sparse delta_y instead of checking zeros + for (i_t nnz_idx = 0; nnz_idx < delta_y_sparse.i.size(); ++nnz_idx) { + const i_t row = delta_y_sparse.i[nnz_idx]; + const f_t val = delta_y_sparse.x[nnz_idx]; + + // Accumulate contributions from this row to all columns + const i_t row_start = Arow.row_start[row]; + const i_t row_end = Arow.row_start[row + 1]; + for (i_t p = row_start; p < row_end; ++p) { + const i_t col = Arow.j[p]; + delta_expanded[col] += Arow.x[p] * val; } - delta_zN[k] = -dot; + } + for (i_t k = 0; k < n - m; ++k) { + delta_zN[k] = -delta_expanded[nonbasic_list[k]]; } i_t entering_index = -1; @@ -435,8 +442,10 @@ i_t dual_push(const lp_problem_t& lp, assert(step_length >= -1e-6); // y <- y + step_length * delta_y - for (i_t i = 0; i < m; ++i) { - y[i] += step_length * delta_y[i]; + // Optimized: Only update non-zero elements from sparse representation + for (i_t nnz_idx = 0; nnz_idx < delta_y_sparse.i.size(); ++nnz_idx) { + const i_t i = delta_y_sparse.i[nnz_idx]; + y[i] += step_length * delta_y_sparse.x[nnz_idx]; } // z <- z + step_length * delta z @@ -725,7 +734,6 @@ i_t primal_push(const lp_problem_t& lp, { const i_t m = lp.num_rows; const i_t n = lp.num_cols; - settings.log.debug("Primal push: superbasic %ld\n", superbasic_list.size()); std::vector& x = solution.x; @@ -1002,6 +1010,7 @@ i_t primal_push(const lp_problem_t& lp, } solution.x = x_compare; solution.iterations += num_pushes; + return 0; } @@ -1190,6 +1199,9 @@ crossover_status_t crossover(const lp_problem_t& lp, f_t crossover_start = tic(); f_t work_estimate = 0; + csr_matrix_t Arow(m, n, 1); + lp.A.to_compressed_row(Arow); + settings.log.printf("\n"); settings.log.printf("Starting crossover\n"); @@ -1332,7 +1344,7 @@ crossover_status_t crossover(const lp_problem_t& lp, verify_basis(m, n, vstatus); compare_vstatus_with_lists(m, n, basic_list, nonbasic_list, vstatus); i_t dual_push_status = dual_push( - lp, settings, start_time, solution, ft, basic_list, nonbasic_list, superbasic_list, vstatus); + lp, Arow, settings, start_time, solution, ft, basic_list, nonbasic_list, superbasic_list, vstatus); if (dual_push_status < 0) { return return_to_status(dual_push_status); } settings.log.debug("basic list size %ld m %d\n", basic_list.size(), m); settings.log.debug("nonbasic list size %ld n - m %d\n", nonbasic_list.size(), n - m); diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 657ebc476..53bfcf8ac 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -30,7 +30,7 @@ struct element_t { f_t x; // coefficient value i_t next_in_column; // index of the next element in the column: kNone if there is no next element i_t next_in_row; // index of the next element in the row: kNone if there is no next element -}; +}; // 24 bytes constexpr int kNone = -1; template @@ -86,11 +86,11 @@ i_t load_elements(const csc_matrix_t& A, std::vector>& elements, std::vector& first_in_row, std::vector& first_in_col, + std::vector& last_in_row, f_t& work_estimate) { const i_t m = A.m; const i_t n = column_list.size(); - std::vector last_element_in_row(m, kNone); work_estimate += m; i_t nz = 0; @@ -105,15 +105,9 @@ i_t load_elements(const csc_matrix_t& A, elements[nz].x = A.x[p]; elements[nz].next_in_column = kNone; if (p > col_start) { elements[nz - 1].next_in_column = nz; } - elements[nz].next_in_row = kNone; // set the current next in row to None (since we don't know - // if there will be more entries in this row) - if (last_element_in_row[i] != kNone) { - // If we have seen an entry in this row before, set the last entry we've seen in this row to - // point to the current entry - elements[last_element_in_row[i]].next_in_row = nz; - } - // The current entry becomes the last element seen in the row - last_element_in_row[i] = nz; + elements[nz].next_in_row = kNone; + if (last_in_row[i] != kNone) { elements[last_in_row[i]].next_in_row = nz; } + last_in_row[i] = nz; if (p == col_start) { first_in_col[k] = nz; } if (first_in_row[i] == kNone) { first_in_row[i] = nz; } nz++; @@ -316,10 +310,11 @@ void update_Cdegree_and_col_count(i_t pivot_i, const std::vector& first_in_row, std::vector& Cdegree, std::vector>& col_count, + std::vector& col_pos, std::vector>& elements, f_t& work_estimate) { - // Update Cdegree and col_count + // Update Cdegree and col_count (O(1) removal using position array) i_t loop_count = 0; for (i_t p = first_in_row[pivot_i]; p != kNone; p = elements[p].next_in_row) { element_t* entry = &elements[p]; @@ -327,20 +322,20 @@ void update_Cdegree_and_col_count(i_t pivot_i, assert(entry->i == pivot_i); i_t cdeg = Cdegree[j]; assert(cdeg >= 0); - for (typename std::vector::iterator it = col_count[cdeg].begin(); - it != col_count[cdeg].end(); - it++) { - if (*it == j) { - // Remove col j from col_count[cdeg] - std::swap(*it, col_count[cdeg].back()); - col_count[cdeg].pop_back(); - work_estimate += (it - col_count[cdeg].begin()); - break; - } + // O(1) swap-with-last removal + { + i_t pos = col_pos[j]; + i_t other = col_count[cdeg].back(); + col_count[cdeg][pos] = other; + col_pos[other] = pos; + col_count[cdeg].pop_back(); } cdeg = --Cdegree[j]; assert(cdeg >= 0); - if (j != pivot_j && cdeg >= 0) { col_count[cdeg].push_back(j); } + if (j != pivot_j && cdeg >= 0) { + col_pos[j] = col_count[cdeg].size(); + col_count[cdeg].push_back(j); + } loop_count++; } work_estimate += 7 * loop_count; @@ -353,30 +348,31 @@ void update_Rdegree_and_row_count(i_t pivot_i, const std::vector& first_in_col, std::vector& Rdegree, std::vector>& row_count, + std::vector& row_pos, std::vector>& elements, f_t& work_estimate) { - // Update Rdegree and row_count + // Update Rdegree and row_count (O(1) removal using position array) i_t loop_count = 0; for (i_t p = first_in_col[pivot_j]; p != kNone; p = elements[p].next_in_column) { element_t* entry = &elements[p]; const i_t i = entry->i; i_t rdeg = Rdegree[i]; assert(rdeg >= 0); - for (typename std::vector::iterator it = row_count[rdeg].begin(); - it != row_count[rdeg].end(); - it++) { - if (*it == i) { - // Remove row i from row_count[rdeg] - std::swap(*it, row_count[rdeg].back()); - row_count[rdeg].pop_back(); - work_estimate += (it - row_count[rdeg].begin()); - break; - } + // O(1) swap-with-last removal + { + i_t pos = row_pos[i]; + i_t other = row_count[rdeg].back(); + row_count[rdeg][pos] = other; + row_pos[other] = pos; + row_count[rdeg].pop_back(); } rdeg = --Rdegree[i]; assert(rdeg >= 0); - if (i != pivot_i && rdeg >= 0) { row_count[rdeg].push_back(i); } + if (i != pivot_i && rdeg >= 0) { + row_pos[i] = row_count[rdeg].size(); + row_count[rdeg].push_back(i); + } loop_count++; } work_estimate += 7 * loop_count; @@ -400,18 +396,15 @@ void schur_complement(i_t pivot_i, std::vector& Cdegree, std::vector>& row_count, std::vector>& col_count, + std::vector& last_in_row, + std::vector& col_pos, + std::vector& row_pos, std::vector>& elements, f_t& work_estimate) { + // Initialize row_last_workspace from last_in_row (O(1) per row, no full row traversal) for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { - element_t* e = &elements[p1]; - const i_t i = e->i; - i_t row_last = kNone; - for (i_t p3 = first_in_row[i]; p3 != kNone; p3 = elements[p3].next_in_row) { - row_last = p3; - } - work_estimate += 2 * Rdegree[i]; - row_last_workspace[i] = row_last; + row_last_workspace[elements[p1].i] = last_in_row[elements[p1].i]; } work_estimate += 4 * Cdegree[pivot_j]; @@ -478,35 +471,29 @@ void schur_complement(i_t pivot_i, first_in_row[i] = fill_p; } row_last_workspace[i] = fill_p; - i_t rdeg = Rdegree[i]; // Rdgree must increase - for (typename std::vector::iterator it = row_count[rdeg].begin(); - it != row_count[rdeg].end(); - it++) { - if (*it == i) { - // Remove row i from row_count[rdeg] - std::swap(*it, row_count[rdeg].back()); - row_count[rdeg].pop_back(); - work_estimate += 2 * (it - row_count[rdeg].begin()); - break; - } + last_in_row[i] = fill_p; // maintain last_in_row persistent state + // Row degree update: O(1) removal using row_pos + { + i_t rdeg = Rdegree[i]; + i_t pos = row_pos[i]; + i_t other = row_count[rdeg].back(); + row_count[rdeg][pos] = other; + row_pos[other] = pos; + row_count[rdeg].pop_back(); + row_pos[i] = row_count[rdeg + 1].size(); + row_count[++Rdegree[i]].push_back(i); } - rdeg = ++Rdegree[i]; // Increase rdeg - row_count[rdeg].push_back(i); // Add row i to row_count[rdeg] - - i_t cdeg = Cdegree[j]; // Cdegree must increase - for (typename std::vector::iterator it = col_count[cdeg].begin(); - it != col_count[cdeg].end(); - it++) { - if (*it == j) { - // Remove col j from col_count[cdeg] - std::swap(*it, col_count[cdeg].back()); - col_count[cdeg].pop_back(); - work_estimate += 2 * (it - col_count[cdeg].begin()); - break; - } + // Col degree update: O(1) removal using col_pos + { + i_t cdeg = Cdegree[j]; + i_t pos = col_pos[j]; + i_t other = col_count[cdeg].back(); + col_count[cdeg][pos] = other; + col_pos[other] = pos; + col_count[cdeg].pop_back(); + col_pos[j] = col_count[cdeg + 1].size(); + col_count[++Cdegree[j]].push_back(j); } - cdeg = ++Cdegree[j]; // Increase Cdegree - col_count[cdeg].push_back(j); // Add column j to col_count[cdeg] } } work_estimate += 10 * Cdegree[pivot_j]; @@ -532,7 +519,6 @@ void remove_pivot_row(i_t pivot_i, f_t& work_estimate) { // Remove the pivot row - i_t row_loop_count = 0; for (i_t p0 = first_in_row[pivot_i]; p0 != kNone; p0 = elements[p0].next_in_row) { element_t* e = &elements[p0]; @@ -574,6 +560,7 @@ void remove_pivot_col(i_t pivot_i, std::vector& first_in_col, std::vector& first_in_row, std::vector& max_in_row, + std::vector& last_in_row, std::vector>& elements, f_t& work_estimate) { @@ -583,6 +570,7 @@ void remove_pivot_col(i_t pivot_i, element_t* e = &elements[p1]; const i_t i = e->i; i_t last = kNone; + i_t last_surviving = kNone; #ifdef THRESHOLD_ROOK_PIVOTING f_t max_in_row_i = 0.0; #endif @@ -598,16 +586,17 @@ void remove_pivot_col(i_t pivot_i, entry->i = -1; entry->j = -1; entry->x = std::numeric_limits::quiet_NaN(); - } + } else { + last_surviving = p; #ifdef THRESHOLD_ROOK_PIVOTING - else { const f_t abs_entryx = std::abs(entry->x); if (abs_entryx > max_in_row_i) { max_in_row_i = abs_entryx; } - } #endif + } last = p; row_loop_count++; } + last_in_row[i] = last_surviving; work_estimate += 3 * row_loop_count; #ifdef THRESHOLD_ROOK_PIVOTING max_in_row[i] = max_in_row_i; @@ -656,11 +645,28 @@ i_t right_looking_lu(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); + + // Position arrays for O(1) degree-bucket removal + std::vector col_pos(n); + for (i_t d = 0; d <= n; ++d) { + for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { + col_pos[col_count[d][pos]] = pos; + } + } + std::vector row_pos(n); + for (i_t d = 0; d <= n; ++d) { + for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { + row_pos[row_count[d][pos]] = pos; + } + } + std::vector> elements(Bnz); std::vector first_in_row(n, kNone); std::vector first_in_col(n, kNone); + std::vector last_in_row(n, kNone); work_estimate += 2 * n + Bnz; - load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col, work_estimate); + load_elements( + A, column_list, Bnz, elements, first_in_row, first_in_col, last_in_row, work_estimate); std::vector column_j_workspace(n, kNone); std::vector row_last_workspace(n); @@ -777,9 +783,9 @@ i_t right_looking_lu(const csc_matrix_t& A, // Update Cdegree and col_count update_Cdegree_and_col_count( - pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); + pivot_i, pivot_j, first_in_row, Cdegree, col_count, col_pos, elements, work_estimate); update_Rdegree_and_row_count( - pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); + pivot_i, pivot_j, first_in_col, Rdegree, row_count, row_pos, elements, work_estimate); // A22 <- A22 - l u^T schur_complement(pivot_i, @@ -798,14 +804,23 @@ i_t right_looking_lu(const csc_matrix_t& A, Cdegree, row_count, col_count, + last_in_row, + col_pos, + row_pos, elements, work_estimate); // Remove the pivot row remove_pivot_row( pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); - remove_pivot_col( - pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); + remove_pivot_col(pivot_i, + pivot_j, + first_in_col, + first_in_row, + max_in_row, + last_in_row, + elements, + work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1; @@ -1030,10 +1045,28 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); + + // Position arrays for O(1) degree-bucket removal + // col_count has m+1 buckets, row_count has n+1 buckets + std::vector col_pos(n); + for (i_t d = 0; d <= m; ++d) { + for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { + col_pos[col_count[d][pos]] = pos; + } + } + std::vector row_pos(m); + for (i_t d = 0; d <= n; ++d) { + for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { + row_pos[row_count[d][pos]] = pos; + } + } + std::vector> elements(Bnz); std::vector first_in_row(m, kNone); std::vector first_in_col(n, kNone); - load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col, work_estimate); + std::vector last_in_row(m, kNone); + load_elements( + A, column_list, Bnz, elements, first_in_row, first_in_col, last_in_row, work_estimate); std::vector column_j_workspace(m, kNone); std::vector row_last_workspace(m); @@ -1100,9 +1133,9 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, // Update Cdegree and col_count update_Cdegree_and_col_count( - pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); + pivot_i, pivot_j, first_in_row, Cdegree, col_count, col_pos, elements, work_estimate); update_Rdegree_and_row_count( - pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); + pivot_i, pivot_j, first_in_col, Rdegree, row_count, row_pos, elements, work_estimate); // A22 <- A22 - l u^T schur_complement(pivot_i, @@ -1121,14 +1154,23 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, Cdegree, row_count, col_count, + last_in_row, + col_pos, + row_pos, elements, work_estimate); // Remove the pivot row remove_pivot_row( pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); - remove_pivot_col( - pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); + remove_pivot_col(pivot_i, + pivot_j, + first_in_col, + first_in_row, + max_in_row, + last_in_row, + elements, + work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1;