From 5fdd95406a0f412fa22006308d6f03e8fa31c586 Mon Sep 17 00:00:00 2001 From: haochong zhang Date: Wed, 20 May 2026 08:53:36 +0000 Subject: [PATCH 1/3] feat(io): extend out_mat_hs2 to support all 4 CSR sparse output formats - Extended out_mat_hs2 to map integers 0 to 4 (disabled, single-file text/binary, and multi-file text/binary). - Maintained backward compatibility with string boolean inputs ('true', 'false', 'yes', 'no'). - Added support for rank suffixes (.0, .1, etc.) for multi-file CSR and position matrix outputs when reduce is false. - Refactored output_HSR to use a clean templated loop structure, eliminating duplicate code blocks. - Optimized parallel CSR output performance using in-memory collective MPI gathering (MPI_Gatherv), replacing slow disk-based row-by-row gathering. --- .../source_io/module_hs/cal_r_overlap_R.cpp | 79 ++++-- source/source_io/module_hs/cal_r_overlap_R.h | 4 +- .../source_io/module_hs/output_mat_sparse.cpp | 26 +- source/source_io/module_hs/single_R_io.cpp | 233 +++++++++++++----- source/source_io/module_hs/write_HS_R.cpp | 206 +++++++++++++--- source/source_io/module_hs/write_HS_R.h | 17 +- .../source_io/module_hs/write_HS_sparse.cpp | 44 ++-- source/source_io/module_hs/write_HS_sparse.h | 4 +- .../source_io/module_parameter/input_conv.cpp | 2 +- .../read_input_item_output.cpp | 15 +- 10 files changed, 473 insertions(+), 157 deletions(-) diff --git a/source/source_io/module_hs/cal_r_overlap_R.cpp b/source/source_io/module_hs/cal_r_overlap_R.cpp index f75570a4116..15c5f00c2f5 100644 --- a/source/source_io/module_hs/cal_r_overlap_R.cpp +++ b/source/source_io/module_hs/cal_r_overlap_R.cpp @@ -595,7 +595,7 @@ void cal_r_overlap_R::get_psi_r_beta(const UnitCell& ucell, } -void cal_r_overlap_R::out_rR(const UnitCell& ucell, const Grid_Driver& gd, const int& istep) +void cal_r_overlap_R::out_rR(const UnitCell& ucell, const Grid_Driver& gd, const int& istep, const bool& reduce) { ModuleBase::TITLE("cal_r_overlap_R", "out_rR"); ModuleBase::timer::start("cal_r_overlap_R", "out_rR"); @@ -631,10 +631,14 @@ void cal_r_overlap_R::out_rR(const UnitCell& ucell, const Grid_Driver& gd, const std::stringstream tem1; tem1 << PARAM.globalv.global_out_dir << "tmp-rr.csr"; + if (!reduce && GlobalV::NPROC > 1) + { + tem1 << "." << GlobalV::DRANK; + } std::ofstream ofs_tem1; std::ifstream ifs_tem1; - if (GlobalV::DRANK == 0) + if (!reduce || GlobalV::DRANK == 0) { if (binary) { @@ -754,26 +758,32 @@ void cal_r_overlap_R::out_rR(const UnitCell& ucell, const Grid_Driver& gd, const } } - Parallel_Reduce::reduce_all(rR_nonzero_num, 3); + if (reduce) + { + Parallel_Reduce::reduce_all(rR_nonzero_num, 3); + } if (rR_nonzero_num[0] || rR_nonzero_num[1] || rR_nonzero_num[2]) { output_R_number++; - if (binary) - { - ofs_tem1.write(reinterpret_cast(&dRx), sizeof(int)); - ofs_tem1.write(reinterpret_cast(&dRy), sizeof(int)); - ofs_tem1.write(reinterpret_cast(&dRz), sizeof(int)); - } - else + if (!reduce || GlobalV::DRANK == 0) { - ofs_tem1 << dRx << " " << dRy << " " << dRz << std::endl; + if (binary) + { + ofs_tem1.write(reinterpret_cast(&dRx), sizeof(int)); + ofs_tem1.write(reinterpret_cast(&dRy), sizeof(int)); + ofs_tem1.write(reinterpret_cast(&dRz), sizeof(int)); + } + else + { + ofs_tem1 << dRx << " " << dRy << " " << dRz << std::endl; + } } for (int direction = 0; direction < 3; ++direction) { - if (GlobalV::DRANK == 0) + if (!reduce || GlobalV::DRANK == 0) { if (binary) { @@ -791,7 +801,8 @@ void cal_r_overlap_R::out_rR(const UnitCell& ucell, const Grid_Driver& gd, const psi_r_psi_sparse[direction], sparse_threshold, binary, - *(this->ParaV)); + *(this->ParaV), + reduce); } else { @@ -801,7 +812,7 @@ void cal_r_overlap_R::out_rR(const UnitCell& ucell, const Grid_Driver& gd, const } } - if (GlobalV::DRANK == 0) + if (!reduce || GlobalV::DRANK == 0) { std::ofstream out_r; std::stringstream ssr; @@ -814,6 +825,10 @@ void cal_r_overlap_R::out_rR(const UnitCell& ucell, const Grid_Driver& gd, const { ssr << PARAM.globalv.global_out_dir << "rr.csr"; } + if (!reduce && GlobalV::NPROC > 1) + { + ssr << "." << GlobalV::DRANK; + } if (binary) // .dat { @@ -864,7 +879,7 @@ void cal_r_overlap_R::out_rR(const UnitCell& ucell, const Grid_Driver& gd, const return; } -void cal_r_overlap_R::out_rR_other(const UnitCell& ucell, const int& istep, const std::set>& output_R_coor) +void cal_r_overlap_R::out_rR_other(const UnitCell& ucell, const int& istep, const std::set>& output_R_coor, const bool& reduce) { ModuleBase::TITLE("cal_r_overlap_R", "out_rR_other"); ModuleBase::timer::start("cal_r_overlap_R", "out_rR_other"); @@ -887,8 +902,12 @@ void cal_r_overlap_R::out_rR_other(const UnitCell& ucell, const int& istep, cons { ssr << PARAM.globalv.global_out_dir << "rr.csr"; } + if (!reduce && GlobalV::NPROC > 1) + { + ssr << "." << GlobalV::DRANK; + } - if (GlobalV::DRANK == 0) + if (!reduce || GlobalV::DRANK == 0) { if (binary) { @@ -1030,22 +1049,28 @@ void cal_r_overlap_R::out_rR_other(const UnitCell& ucell, const int& istep, cons } } - Parallel_Reduce::reduce_all(rR_nonzero_num, 3); - - if (binary) // .dat + if (reduce) { - out_r.write(reinterpret_cast(&dRx), sizeof(int)); - out_r.write(reinterpret_cast(&dRy), sizeof(int)); - out_r.write(reinterpret_cast(&dRz), sizeof(int)); + Parallel_Reduce::reduce_all(rR_nonzero_num, 3); } - else // .txt + + if (!reduce || GlobalV::DRANK == 0) { - out_r << dRx << " " << dRy << " " << dRz << std::endl; + if (binary) // .dat + { + out_r.write(reinterpret_cast(&dRx), sizeof(int)); + out_r.write(reinterpret_cast(&dRy), sizeof(int)); + out_r.write(reinterpret_cast(&dRz), sizeof(int)); + } + else // .txt + { + out_r << dRx << " " << dRy << " " << dRz << std::endl; + } } for (int direction = 0; direction < 3; ++direction) { - if (GlobalV::DRANK == 0) + if (!reduce || GlobalV::DRANK == 0) { if (binary) { @@ -1059,7 +1084,7 @@ void cal_r_overlap_R::out_rR_other(const UnitCell& ucell, const int& istep, cons if (rR_nonzero_num[direction]) { - ModuleIO::output_single_R(out_r, psi_r_psi_sparse[direction], sparse_threshold, binary, *(this->ParaV)); + ModuleIO::output_single_R(out_r, psi_r_psi_sparse[direction], sparse_threshold, binary, *(this->ParaV), reduce); } else { @@ -1068,7 +1093,7 @@ void cal_r_overlap_R::out_rR_other(const UnitCell& ucell, const int& istep, cons } } - if (GlobalV::DRANK == 0) + if (!reduce || GlobalV::DRANK == 0) { out_r.close(); } diff --git a/source/source_io/module_hs/cal_r_overlap_R.h b/source/source_io/module_hs/cal_r_overlap_R.h index 2329cd4ea85..bc643e7f53f 100644 --- a/source/source_io/module_hs/cal_r_overlap_R.h +++ b/source/source_io/module_hs/cal_r_overlap_R.h @@ -56,8 +56,8 @@ class cal_r_overlap_R const ModuleBase::Vector3& R2, const int& T2 ); - void out_rR(const UnitCell& ucell, const Grid_Driver& gd, const int& istep); - void out_rR_other(const UnitCell& ucell, const int& istep, const std::set>& output_R_coor); + void out_rR(const UnitCell& ucell, const Grid_Driver& gd, const int& istep, const bool& reduce = true); + void out_rR_other(const UnitCell& ucell, const int& istep, const std::set>& output_R_coor, const bool& reduce = true); private: void initialize_orb_table(const UnitCell& ucell, const LCAO_Orbitals& orb); diff --git a/source/source_io/module_hs/output_mat_sparse.cpp b/source/source_io/module_hs/output_mat_sparse.cpp index 9f31352b432..189ed7b6520 100644 --- a/source/source_io/module_hs/output_mat_sparse.cpp +++ b/source/source_io/module_hs/output_mat_sparse.cpp @@ -2,6 +2,7 @@ #include "cal_r_overlap_R.h" #include "source_io/module_hs/write_HS_R.h" +#include "source_io/module_parameter/parameter.h" namespace ModuleIO { @@ -23,10 +24,13 @@ void output_mat_sparse(const bool& out_mat_dh, { LCAO_HS_Arrays HS_Arrays; // store sparse arrays + const int out_mat_hsR = PARAM.inp.out_mat_hs2[0]; + const bool reduce = (out_mat_hsR != 3 && out_mat_hsR != 4); + const bool binary = (out_mat_hsR == 2 || out_mat_hsR == 4); //! generate a file containing the kinetic energy matrix if (out_mat_t) { - output_TR(istep, ucell, pv, HS_Arrays, grid, two_center_bundle, orb); + output_TR(istep, ucell, pv, HS_Arrays, grid, two_center_bundle, orb, "trs1_nao.csr", binary, 1e-10, reduce); } //! generate a file containing the derivatives of the Hamiltonian matrix (in Ry/Bohr) @@ -40,7 +44,10 @@ void output_mat_sparse(const bool& out_mat_dh, grid, two_center_bundle, orb, - kv); + kv, + binary, + 1e-10, + reduce); } //! generate a file containing the derivatives of the overlap matrix (in Ry/Bohr) if (out_mat_ds) @@ -52,7 +59,10 @@ void output_mat_sparse(const bool& out_mat_dh, grid, two_center_bundle, orb, - kv); + kv, + binary, + 1e-10, + reduce); } // add by jingan for out r_R matrix 2019.8.14 @@ -60,7 +70,15 @@ void output_mat_sparse(const bool& out_mat_dh, { cal_r_overlap_R r_matrix; r_matrix.init(ucell, pv, orb); - r_matrix.out_rR(ucell, grid, istep); + r_matrix.binary = binary; + if (out_mat_hsR != 0) + { + r_matrix.out_rR_other(ucell, istep, HS_Arrays.output_R_coor, reduce); + } + else + { + r_matrix.out_rR(ucell, grid, istep, reduce); + } } return; diff --git a/source/source_io/module_hs/single_R_io.cpp b/source/source_io/module_hs/single_R_io.cpp index bba94c345ee..be7a85cb5ca 100644 --- a/source/source_io/module_hs/single_R_io.cpp +++ b/source/source_io/module_hs/single_R_io.cpp @@ -3,10 +3,15 @@ #include "source_io/module_parameter/parameter.h" #include "source_base/global_function.h" #include "source_base/global_variable.h" +#include +#include +#include +#include +#include inline void write_data(std::ofstream& ofs, const double& data) { - ofs << " " << std::fixed << std::scientific << std::setprecision(16) << data; + ofs << " " << data; } inline void write_data(std::ofstream& ofs, const std::complex& data) { @@ -21,109 +26,211 @@ void ModuleIO::output_single_R(std::ofstream& ofs, const Parallel_Orbitals& pv, const bool& reduce) { - T* line = nullptr; - std::vector indptr; - indptr.reserve(PARAM.globalv.nlocal + 1); - indptr.push_back(0); + // Step 1: Collect non-zero elements local to this processor from XR + std::vector local_rows; + std::vector local_cols; + std::vector local_vals; - std::stringstream tem1; - tem1 << PARAM.globalv.global_out_dir << std::to_string(GlobalV::DRANK) + "temp_sparse_indices.dat"; - std::ofstream ofs_tem1; - std::ifstream ifs_tem1; - - if (!reduce || GlobalV::DRANK == 0) + for (auto const& row_pair : XR) { - if (binary) - { - ofs_tem1.open(tem1.str().c_str(), std::ios::binary); - } - else + auto const& row = row_pair.first; + for (auto const& col_pair : row_pair.second) { - ofs_tem1.open(tem1.str().c_str()); + auto const& col = col_pair.first; + auto const& val = col_pair.second; + if (std::abs(val) > sparse_threshold) + { + local_rows.push_back(row); + local_cols.push_back(col); + local_vals.push_back(val); + } } } - line = new T[PARAM.globalv.nlocal]; - for(int row = 0; row < PARAM.globalv.nlocal; ++row) + // Step 2: Handle gathering +#ifdef __MPI + if (reduce && GlobalV::NPROC > 1) { - ModuleBase::GlobalFunc::ZEROS(line, PARAM.globalv.nlocal); + int local_size = local_rows.size(); + std::vector recvcounts; + std::vector displs; + int total_size = 0; - if (!reduce || pv.global2local_row(row) >= 0) + if (GlobalV::DRANK == 0) { - auto iter = XR.find(row); - if (iter != XR.end()) + recvcounts.resize(GlobalV::NPROC); + } + + MPI_Gather(&local_size, 1, MPI_INT, recvcounts.data(), 1, MPI_INT, 0, MPI_COMM_WORLD); + + std::vector global_rows; + std::vector global_cols; + std::vector global_vals; + + if (GlobalV::DRANK == 0) + { + displs.resize(GlobalV::NPROC); + displs[0] = 0; + for (int i = 0; i < GlobalV::NPROC; ++i) { - for (auto &value : iter->second) + total_size += recvcounts[i]; + if (i > 0) { - line[value.first] = value.second; + displs[i] = displs[i - 1] + recvcounts[i - 1]; } } + global_rows.resize(total_size); + global_cols.resize(total_size); + global_vals.resize(total_size); } - if (reduce) - { - Parallel_Reduce::reduce_all(line, PARAM.globalv.nlocal); - } + MPI_Datatype mpi_type; + if (std::is_same::value) + { + mpi_type = MPI_DOUBLE; + } + else + { + mpi_type = MPI_DOUBLE_COMPLEX; + } - if (!reduce || GlobalV::DRANK == 0) + MPI_Gatherv(local_rows.data(), local_size, MPI_INT, + global_rows.data(), recvcounts.data(), displs.data(), MPI_INT, + 0, MPI_COMM_WORLD); + MPI_Gatherv(local_cols.data(), local_size, MPI_INT, + global_cols.data(), recvcounts.data(), displs.data(), MPI_INT, + 0, MPI_COMM_WORLD); + MPI_Gatherv(local_vals.data(), local_size, mpi_type, + global_vals.data(), recvcounts.data(), displs.data(), mpi_type, + 0, MPI_COMM_WORLD); + + if (GlobalV::DRANK == 0) { - long long nonzeros_count = 0; - for (int col = 0; col < PARAM.globalv.nlocal; ++col) + std::vector>> gathered_rows(PARAM.globalv.nlocal); + for (int i = 0; i < total_size; ++i) + { + gathered_rows[global_rows[i]].push_back({global_cols[i], global_vals[i]}); + } + + for (int r = 0; r < PARAM.globalv.nlocal; ++r) { - if (std::abs(line[col]) > sparse_threshold) + std::sort(gathered_rows[r].begin(), gathered_rows[r].end(), + [](const std::pair& a, const std::pair& b) { return a.first < b.first; }); + } + + std::vector col_indices; + std::vector values; + std::vector indptr(PARAM.globalv.nlocal + 1); + indptr[0] = 0; + + for (int r = 0; r < PARAM.globalv.nlocal; ++r) + { + for (auto const& col_pair : gathered_rows[r]) { - if (binary) - { - ofs.write(reinterpret_cast(&line[col]), sizeof(T)); - ofs_tem1.write(reinterpret_cast(&col), sizeof(int)); - } - else - { - write_data(ofs, line[col]); - ofs_tem1 << " " << col; - } - - nonzeros_count++; + col_indices.push_back(col_pair.first); + values.push_back(col_pair.second); + } + indptr[r + 1] = col_indices.size(); + } + // Write to file + if (binary) + { + if (!values.empty()) + { + ofs.write(reinterpret_cast(values.data()), values.size() * sizeof(T)); + ofs.write(reinterpret_cast(col_indices.data()), col_indices.size() * sizeof(int)); } + ofs.write(reinterpret_cast(indptr.data()), indptr.size() * sizeof(long long)); + } + else + { + ofs << std::fixed << std::scientific << std::setprecision(8); + for (auto const& val : values) + { + write_data(ofs, val); + } + ofs << std::endl; + + for (auto const& col : col_indices) + { + ofs << " " << col; + } + ofs << std::endl; + for (auto const& i : indptr) + { + ofs << " " << i; + } + ofs << std::endl; } - nonzeros_count += indptr.back(); - indptr.push_back(nonzeros_count); } + return; } +#endif - delete[] line; - + // Serial mode or no reduce if (!reduce || GlobalV::DRANK == 0) { + // Reconstruct local rows and construct CSR directly + std::vector>> gathered_rows(PARAM.globalv.nlocal); + for (size_t i = 0; i < local_rows.size(); ++i) + { + gathered_rows[local_rows[i]].push_back({local_cols[i], local_vals[i]}); + } + + for (int r = 0; r < PARAM.globalv.nlocal; ++r) + { + std::sort(gathered_rows[r].begin(), gathered_rows[r].end(), + [](const std::pair& a, const std::pair& b) { return a.first < b.first; }); + } + + std::vector col_indices; + std::vector values; + std::vector indptr(PARAM.globalv.nlocal + 1); + indptr[0] = 0; + + for (int r = 0; r < PARAM.globalv.nlocal; ++r) + { + for (auto const& col_pair : gathered_rows[r]) + { + col_indices.push_back(col_pair.first); + values.push_back(col_pair.second); + } + indptr[r + 1] = col_indices.size(); + } + + // Write to file if (binary) { - ofs_tem1.close(); - ifs_tem1.open(tem1.str().c_str(), std::ios::binary); - ofs << ifs_tem1.rdbuf(); - ifs_tem1.close(); - for (auto &i : indptr) + if (!values.empty()) { - ofs.write(reinterpret_cast(&i), sizeof(long long)); + ofs.write(reinterpret_cast(values.data()), values.size() * sizeof(T)); + ofs.write(reinterpret_cast(col_indices.data()), col_indices.size() * sizeof(int)); } + ofs.write(reinterpret_cast(indptr.data()), indptr.size() * sizeof(long long)); } else { + ofs << std::fixed << std::scientific << std::setprecision(8); + for (auto const& val : values) + { + write_data(ofs, val); + } ofs << std::endl; - ofs_tem1 << std::endl; - ofs_tem1.close(); - ifs_tem1.open(tem1.str().c_str()); - ofs << ifs_tem1.rdbuf(); - ifs_tem1.close(); - for (auto &i : indptr) + + for (auto const& col : col_indices) + { + ofs << " " << col; + } + ofs << std::endl; + + for (auto const& i : indptr) { ofs << " " << i; } ofs << std::endl; } - - std::remove(tem1.str().c_str()); } } diff --git a/source/source_io/module_hs/write_HS_R.cpp b/source/source_io/module_hs/write_HS_R.cpp index 2a16db86d1f..f6c7db7f17d 100644 --- a/source/source_io/module_hs/write_HS_R.cpp +++ b/source/source_io/module_hs/write_HS_R.cpp @@ -41,6 +41,7 @@ hamilt::HContainer* sparse_map_to_hcontainer( // If the absolute value of the matrix element is less than or equal to the // 'sparse_thr', it will be ignored. + void ModuleIO::output_dSR(const int& istep, const UnitCell& ucell, const Parallel_Orbitals& pv, @@ -50,7 +51,8 @@ void ModuleIO::output_dSR(const int& istep, const LCAO_Orbitals& orb, const K_Vectors& kv, const bool& binary, - const double& sparse_thr) + const double& sparse_thr, + const bool& reduce) { ModuleBase::TITLE("ModuleIO", "output_dSR"); ModuleBase::timer::start("ModuleIO", "output_dSR"); @@ -58,7 +60,7 @@ void ModuleIO::output_dSR(const int& istep, sparse_format::cal_dS(ucell, pv, HS_Arrays, grid, two_center_bundle, orb, sparse_thr); // mohan update 2024-04-01 - ModuleIO::save_dH_sparse(istep, pv, HS_Arrays, sparse_thr, binary, "s"); + ModuleIO::save_dH_sparse(istep, pv, HS_Arrays, sparse_thr, binary, "s", reduce); sparse_format::destroy_dH_R_sparse(HS_Arrays); @@ -76,7 +78,8 @@ void ModuleIO::output_dHR(const int& istep, const LCAO_Orbitals& orb, const K_Vectors& kv, const bool& binary, - const double& sparse_thr) + const double& sparse_thr, + const bool& reduce) { ModuleBase::TITLE("ModuleIO", "output_dHR"); ModuleBase::timer::start("ModuleIO", "output_dHR"); @@ -104,7 +107,7 @@ void ModuleIO::output_dHR(const int& istep, } } // mohan update 2024-04-01 - ModuleIO::save_dH_sparse(istep, pv, HS_Arrays, sparse_thr, binary); + ModuleIO::save_dH_sparse(istep, pv, HS_Arrays, sparse_thr, binary, "h", reduce); sparse_format::destroy_dH_R_sparse(HS_Arrays); @@ -118,7 +121,8 @@ void ModuleIO::output_SR(Parallel_Orbitals& pv, hamilt::Hamilt* p_ham, const std::string& SR_filename, const bool& binary, - const double& sparse_thr) + const double& sparse_thr, + const bool& reduce) { ModuleBase::TITLE("ModuleIO", "output_SR"); ModuleBase::timer::start("ModuleIO", "output_SR"); @@ -153,7 +157,8 @@ void ModuleIO::output_SR(Parallel_Orbitals& pv, SR_filename, pv, "S", - istep); + istep, + reduce); } else { @@ -164,7 +169,8 @@ void ModuleIO::output_SR(Parallel_Orbitals& pv, SR_filename, pv, "S", - istep); + istep, + reduce); } sparse_format::destroy_HS_R_sparse(HS_Arrays); @@ -182,7 +188,8 @@ void ModuleIO::output_TR(const int istep, const LCAO_Orbitals& orb, const std::string& TR_filename, const bool& binary, - const double& sparse_thr) + const double& sparse_thr, + const bool& reduce) { ModuleBase::TITLE("ModuleIO", "output_TR"); ModuleBase::timer::start("ModuleIO", "output_TR"); @@ -214,7 +221,8 @@ void ModuleIO::output_TR(const int istep, sst.str().c_str(), pv, "T", - istep); + istep, + reduce); sparse_format::destroy_T_R_sparse(HS_Arrays); @@ -222,18 +230,21 @@ void ModuleIO::output_TR(const int istep, return; } + template void ModuleIO::output_SR(Parallel_Orbitals& pv, const Grid_Driver& grid, hamilt::Hamilt* p_ham, const std::string& SR_filename, const bool& binary, - const double& sparse_thr); + const double& sparse_thr, + const bool& reduce); template void ModuleIO::output_SR>(Parallel_Orbitals& pv, const Grid_Driver& grid, hamilt::Hamilt>* p_ham, const std::string& SR_filename, const bool& binary, - const double& sparse_thr); + const double& sparse_thr, + const bool& reduce); #include "source_lcao/module_hcontainer/hcontainer_funcs.h" #include "source_lcao/module_hcontainer/output_hcontainer.h" @@ -276,17 +287,20 @@ void ModuleIO::write_hcontainer_csr(const std::string& fname, const int istep, const int ispin, const int nspin, - const std::string& label) + const std::string& label, + const bool& binary) { std::ofstream ofs; - if (istep <= 0) + std::ios::openmode mode = std::ios::out; + if (istep > 0) { - ofs.open(fname); + mode |= std::ios::app; } - else + if (binary) { - ofs.open(fname, std::ios::app); + mode |= std::ios::binary; } + ofs.open(fname, mode); ofs << " --- Ionic Step " << istep + 1 << " ---" << std::endl; ofs << " # print " << label << " matrix in real space " << label << "(R)" << std::endl; @@ -300,8 +314,94 @@ void ModuleIO::write_hcontainer_csr(const std::string& fname, ofs << std::endl; const double sparse_threshold = 1e-10; - hamilt::Output_HContainer out(mat_serial, ofs, sparse_threshold, precision); - out.write(); + if (!binary) + { + hamilt::Output_HContainer out(mat_serial, ofs, sparse_threshold, precision); + out.write(); + } + else + { + int size_for_loop_R = mat_serial->size_R_loop(); + int rx = 0; + int ry = 0; + int rz = 0; + int R_range[2] = {0, 0}; + + for (int iR = 0; iR < size_for_loop_R; iR++) + { + mat_serial->loop_R(iR, rx, ry, rz); + int max_R = std::max({rx, ry, rz}); + int min_R = std::min({rx, ry, rz}); + if (max_R > R_range[1]) R_range[1] = max_R; + if (min_R < R_range[0]) R_range[0] = min_R; + } + + for (int ix = R_range[0]; ix <= R_range[1]; ix++) + { + for (int iy = R_range[0]; iy <= R_range[1]; iy++) + { + for (int iz = R_range[0]; iz <= R_range[1]; iz++) + { + if (mat_serial->find_R(ix, iy, iz) != -1) + { + mat_serial->fix_R(ix, iy, iz); + ModuleIO::SparseMatrix sparse_matrix(mat_serial->get_nbasis(), mat_serial->get_nbasis()); + sparse_matrix.setSparseThreshold(sparse_threshold); + + for (int iap = 0; iap < mat_serial->size_atom_pairs(); ++iap) + { + auto atom_pair = mat_serial->get_atom_pair(iap); + auto tmp_matrix_info = atom_pair.get_matrix_values(); + int* tmp_index = std::get<0>(tmp_matrix_info).data(); + TR* tmp_data = std::get<1>(tmp_matrix_info); + for (int irow = tmp_index[0]; irow < tmp_index[0] + tmp_index[1]; ++irow) + { + for (int icol = tmp_index[2]; icol < tmp_index[2] + tmp_index[3]; ++icol) + { + sparse_matrix.insert(irow, icol, *tmp_data); + tmp_data++; + } + } + } + + ofs.write(reinterpret_cast(&ix), sizeof(int)); + ofs.write(reinterpret_cast(&iy), sizeof(int)); + ofs.write(reinterpret_cast(&iz), sizeof(int)); + int nnz = sparse_matrix.getNNZ(); + ofs.write(reinterpret_cast(&nnz), sizeof(int)); + + std::vector col_indices; + std::vector values; + std::vector indptr(mat_serial->get_nbasis() + 1, 0); + + for (const auto& elem : sparse_matrix.getElements()) + { + int r = elem.first.first; + int c = elem.first.second; + TR val = elem.second; + values.push_back(val); + col_indices.push_back(c); + indptr[r + 1]++; + } + + for (int i = 0; i < mat_serial->get_nbasis(); ++i) + { + indptr[i + 1] += indptr[i]; + } + + if (nnz != 0) + { + ofs.write(reinterpret_cast(values.data()), values.size() * sizeof(TR)); + ofs.write(reinterpret_cast(col_indices.data()), col_indices.size() * sizeof(int)); + } + ofs.write(reinterpret_cast(indptr.data()), indptr.size() * sizeof(long long)); + + mat_serial->unfix_R(); + } + } + } + } + } ofs.close(); } @@ -319,27 +419,44 @@ void ModuleIO::write_hsr(const std::vector*>& hr_vec, const int nspin = hr_vec.size(); assert(nspin > 0); + const int format = PARAM.inp.out_mat_hs2[0]; + const bool reduce = (format != 3 && format != 4); + const bool binary = (format == 2 || format == 4); + // Output HR (one file per spin) for (int ispin = 0; ispin < nspin; ispin++) { const int nbasis = hr_vec[ispin]->get_nbasis(); + if (reduce) + { #ifdef __MPI - Parallel_Orbitals serialV; - serialV.init(nbasis, nbasis, nbasis, paraV.comm()); - serialV.set_serial(nbasis, nbasis); - serialV.set_atomic_trace(iat2iwt, nat, nbasis); - hamilt::HContainer hr_serial(&serialV); - hamilt::gatherParallels(*hr_vec[ispin], &hr_serial, 0); + Parallel_Orbitals serialV; + serialV.init(nbasis, nbasis, nbasis, paraV.comm()); + serialV.set_serial(nbasis, nbasis); + serialV.set_atomic_trace(iat2iwt, nat, nbasis); + hamilt::HContainer hr_serial(&serialV); + hamilt::gatherParallels(*hr_vec[ispin], &hr_serial, 0); #else - hamilt::HContainer hr_serial(*hr_vec[ispin]); + hamilt::HContainer hr_serial(*hr_vec[ispin]); #endif - if (GlobalV::MY_RANK == 0) + if (GlobalV::MY_RANK == 0) + { + std::string fname = PARAM.globalv.global_out_dir + + hsr_gen_fname("hrs", ispin, append, istep); + write_hcontainer_csr(fname, ucell, precision, &hr_serial, istep, ispin, nspin, "H", binary); + } + } + else { std::string fname = PARAM.globalv.global_out_dir + hsr_gen_fname("hrs", ispin, append, istep); - write_hcontainer_csr(fname, ucell, precision, &hr_serial, istep, ispin, nspin, "H"); + if (GlobalV::NPROC > 1) + { + fname += "." + std::to_string(GlobalV::MY_RANK); + } + write_hcontainer_csr(fname, ucell, precision, hr_vec[ispin], istep, ispin, nspin, "H", binary); } } @@ -347,22 +464,35 @@ void ModuleIO::write_hsr(const std::vector*>& hr_vec, { const int nbasis = sr->get_nbasis(); + if (reduce) + { #ifdef __MPI - Parallel_Orbitals serialV; - serialV.init(nbasis, nbasis, nbasis, paraV.comm()); - serialV.set_serial(nbasis, nbasis); - serialV.set_atomic_trace(iat2iwt, nat, nbasis); - hamilt::HContainer sr_serial(&serialV); - hamilt::gatherParallels(*sr, &sr_serial, 0); + Parallel_Orbitals serialV; + serialV.init(nbasis, nbasis, nbasis, paraV.comm()); + serialV.set_serial(nbasis, nbasis); + serialV.set_atomic_trace(iat2iwt, nat, nbasis); + hamilt::HContainer sr_serial(&serialV); + hamilt::gatherParallels(*sr, &sr_serial, 0); #else - hamilt::HContainer sr_serial(*sr); + hamilt::HContainer sr_serial(*sr); #endif - if (GlobalV::MY_RANK == 0) + if (GlobalV::MY_RANK == 0) + { + std::string fname = PARAM.globalv.global_out_dir + + hsr_gen_fname("srs", 0, append, istep); + write_hcontainer_csr(fname, ucell, precision, &sr_serial, istep, 0, 1, "S", binary); + } + } + else { std::string fname = PARAM.globalv.global_out_dir + hsr_gen_fname("srs", 0, append, istep); - write_hcontainer_csr(fname, ucell, precision, &sr_serial, istep, 0, 1, "S"); + if (GlobalV::NPROC > 1) + { + fname += "." + std::to_string(GlobalV::MY_RANK); + } + write_hcontainer_csr(fname, ucell, precision, const_cast*>(sr), istep, 0, 1, "S", binary); } } } @@ -370,10 +500,10 @@ void ModuleIO::write_hsr(const std::vector*>& hr_vec, // Explicit instantiations template void ModuleIO::write_hcontainer_csr( const std::string&, const UnitCell*, const int, - hamilt::HContainer*, const int, const int, const int, const std::string&); + hamilt::HContainer*, const int, const int, const int, const std::string&, const bool&); template void ModuleIO::write_hcontainer_csr>( const std::string&, const UnitCell*, const int, - hamilt::HContainer>*, const int, const int, const int, const std::string&); + hamilt::HContainer>*, const int, const int, const int, const std::string&, const bool&); template void ModuleIO::write_hsr( const std::vector*>&, diff --git a/source/source_io/module_hs/write_HS_R.h b/source/source_io/module_hs/write_HS_R.h index 27c059f5875..b494a925776 100644 --- a/source/source_io/module_hs/write_HS_R.h +++ b/source/source_io/module_hs/write_HS_R.h @@ -14,6 +14,7 @@ namespace ModuleIO { + void output_dHR(const int& istep, const ModuleBase::matrix& v_eff, const UnitCell& ucell, @@ -24,7 +25,8 @@ void output_dHR(const int& istep, const LCAO_Orbitals& orb, const K_Vectors& kv, const bool& binary = false, - const double& sparse_threshold = 1e-10); + const double& sparse_threshold = 1e-10, + const bool& reduce = true); void output_dSR(const int& istep, const UnitCell& ucell, @@ -35,7 +37,8 @@ void output_dSR(const int& istep, const LCAO_Orbitals& orb, const K_Vectors& kv, const bool& binary = false, - const double& sparse_thr = 1e-10); + const double& sparse_thr = 1e-10, + const bool& reduce = true); void output_TR(const int istep, const UnitCell& ucell, @@ -46,7 +49,8 @@ void output_TR(const int istep, const LCAO_Orbitals& orb, const std::string& TR_filename = "trs1_nao.csr", const bool& binary = false, - const double& sparse_threshold = 1e-10); + const double& sparse_threshold = 1e-10, + const bool& reduce = true); template void output_SR(Parallel_Orbitals& pv, @@ -54,7 +58,8 @@ void output_SR(Parallel_Orbitals& pv, hamilt::Hamilt* p_ham, const std::string& SR_filename = "srs1_nao.csr", const bool& binary = false, - const double& sparse_threshold = 1e-10); + const double& sparse_threshold = 1e-10, + const bool& reduce = true); /// Generate filename for HR/SR CSR output. std::string hsr_gen_fname(const std::string& prefix, @@ -77,7 +82,8 @@ void write_hcontainer_csr(const std::string& fname, const int istep, const int ispin, const int nspin, - const std::string& label); + const std::string& label, + const bool& binary = false); /// Write H(R) and S(R) in CSR format, unified with write_dmr interface. template @@ -103,7 +109,6 @@ void write_matrix_r(const std::string& matrix_label, const int* iat2iwt, const int nat, const int istep); - } // namespace ModuleIO #endif diff --git a/source/source_io/module_hs/write_HS_sparse.cpp b/source/source_io/module_hs/write_HS_sparse.cpp index adcdd74951d..a279e1c4571 100644 --- a/source/source_io/module_hs/write_HS_sparse.cpp +++ b/source/source_io/module_hs/write_HS_sparse.cpp @@ -12,7 +12,8 @@ void ModuleIO::save_dH_sparse(const int& istep, LCAO_HS_Arrays& HS_Arrays, const double& sparse_thr, const bool& binary, - const std::string& fileflag) { + const std::string& fileflag, + const bool& reduce) { ModuleBase::TITLE("ModuleIO", "save_dH_sparse"); ModuleBase::timer::start("ModuleIO", "save_dH_sparse"); @@ -83,10 +84,12 @@ void ModuleIO::save_dH_sparse(const int& istep, count++; } - for (int ispin = 0; ispin < spin_loop; ++ispin) { - Parallel_Reduce::reduce_all(dHx_nonzero_num[ispin], total_R_num); - Parallel_Reduce::reduce_all(dHy_nonzero_num[ispin], total_R_num); - Parallel_Reduce::reduce_all(dHz_nonzero_num[ispin], total_R_num); + if (reduce) { + for (int ispin = 0; ispin < spin_loop; ++ispin) { + Parallel_Reduce::reduce_all(dHx_nonzero_num[ispin], total_R_num); + Parallel_Reduce::reduce_all(dHy_nonzero_num[ispin], total_R_num); + Parallel_Reduce::reduce_all(dHz_nonzero_num[ispin], total_R_num); + } } if (PARAM.inp.nspin == 2) @@ -142,11 +145,18 @@ void ModuleIO::save_dH_sparse(const int& istep, sshz[0] << PARAM.globalv.global_out_dir << "d"< 1) { + for (int ispin = 0; ispin < spin_loop; ++ispin) { + sshx[ispin] << "." << GlobalV::DRANK; + sshy[ispin] << "." << GlobalV::DRANK; + sshz[ispin] << "." << GlobalV::DRANK; + } + } std::ofstream g1x[2]; std::ofstream g1y[2]; std::ofstream g1z[2]; - if (GlobalV::DRANK == 0) + if (!reduce || GlobalV::DRANK == 0) { if (binary) // binary format { @@ -257,7 +267,7 @@ void ModuleIO::save_dH_sparse(const int& istep, output_R_coor_ptr.insert(R_coor); - if (GlobalV::DRANK == 0) { + if (!reduce || GlobalV::DRANK == 0) { if (binary) { for (int ispin = 0; ispin < spin_loop; ++ispin) { g1x[ispin].write(reinterpret_cast(&dRx), @@ -309,13 +319,15 @@ void ModuleIO::save_dH_sparse(const int& istep, dHRx_sparse_ptr[ispin][R_coor], sparse_thr, binary, - pv); + pv, + reduce); } else { output_single_R(g1x[ispin], dHRx_soc_sparse_ptr[R_coor], sparse_thr, binary, - pv); + pv, + reduce); } } if (dHy_nonzero_num[ispin][count] > 0) { @@ -324,13 +336,15 @@ void ModuleIO::save_dH_sparse(const int& istep, dHRy_sparse_ptr[ispin][R_coor], sparse_thr, binary, - pv); + pv, + reduce); } else { output_single_R(g1y[ispin], dHRy_soc_sparse_ptr[R_coor], sparse_thr, binary, - pv); + pv, + reduce); } } if (dHz_nonzero_num[ispin][count] > 0) { @@ -339,13 +353,15 @@ void ModuleIO::save_dH_sparse(const int& istep, dHRz_sparse_ptr[ispin][R_coor], sparse_thr, binary, - pv); + pv, + reduce); } else { output_single_R(g1z[ispin], dHRz_soc_sparse_ptr[R_coor], sparse_thr, binary, - pv); + pv, + reduce); } } } @@ -353,7 +369,7 @@ void ModuleIO::save_dH_sparse(const int& istep, count++; } - if (GlobalV::DRANK == 0) { + if (!reduce || GlobalV::DRANK == 0) { for (int ispin = 0; ispin < spin_loop; ++ispin) { g1x[ispin].close(); } diff --git a/source/source_io/module_hs/write_HS_sparse.h b/source/source_io/module_hs/write_HS_sparse.h index 8791a6bdafd..2f4bb7138fc 100644 --- a/source/source_io/module_hs/write_HS_sparse.h +++ b/source/source_io/module_hs/write_HS_sparse.h @@ -9,12 +9,14 @@ namespace ModuleIO { + void save_dH_sparse(const int& istep, const Parallel_Orbitals& pv, LCAO_HS_Arrays& HS_Arrays, const double& sparse_thr, const bool& binary, - const std::string& fileflag = "h"); + const std::string& fileflag = "h", + const bool& reduce = true); template void save_sparse(const std::map, std::map>>& smat, diff --git a/source/source_io/module_parameter/input_conv.cpp b/source/source_io/module_parameter/input_conv.cpp index 9d88945d88e..9252c69108f 100644 --- a/source/source_io/module_parameter/input_conv.cpp +++ b/source/source_io/module_parameter/input_conv.cpp @@ -61,7 +61,7 @@ std::vector Input_Conv::convert_units(std::string params, double c) { void Input_Conv::read_td_efield() { elecstate::H_TDDFT_pw::stype = PARAM.inp.td_stype; - if (PARAM.inp.out_mat_hs2[0] == 1) + if (PARAM.inp.out_mat_hs2[0] != 0) { TD_info::out_mat_R = true; } else { diff --git a/source/source_io/module_parameter/read_input_item_output.cpp b/source/source_io/module_parameter/read_input_item_output.cpp index 4881cb7d48a..ef2cb386b61 100644 --- a/source/source_io/module_parameter/read_input_item_output.cpp +++ b/source/source_io/module_parameter/read_input_item_output.cpp @@ -2,6 +2,7 @@ #include "source_base/tool_quit.h" #include "read_input.h" #include "read_input_tool.h" +#include "source_base/formatter.h" namespace ModuleIO { void ReadInput::item_output() @@ -504,6 +505,7 @@ Also controled by out_freq_ion and out_app_flag. item.category = "Output information"; item.type = R"(Boolean \[Integer\](optional))"; item.description = "Whether to print files containing the Hamiltonian matrix and overlap matrix into files in the directory OUT.${suffix}. For more information, please refer to hs_matrix.md." + "\n0: disabled\n1: single-file text CSR format\n2: single-file binary CSR format\n3: multi-file CSR text format\n4: multi-file binary CSR format" "\n\n[NOTE] In the 3.10-LTS version, the file names are data-HR-sparse_SPIN0.csr and data-SR-sparse_SPIN0.csr, etc."; item.default_value = "False [8]"; item.unit = "Ry"; @@ -511,7 +513,18 @@ Also controled by out_freq_ion and out_app_flag. item.read_value = [](const Input_Item& item, Parameter& para) { const size_t count = item.get_size(); if (count < 1) ModuleBase::WARNING_QUIT("ReadInput", "out_mat_hs2 needs at least 1 value"); - para.input.out_mat_hs2[0] = assume_as_boolean(item.str_values[0]); + std::string val_str = FmtCore::lower(item.str_values[0]); + if (val_str == "true" || val_str == "yes" || val_str == "y" || val_str == "on" || val_str == "t") { + para.input.out_mat_hs2[0] = 1; + } else if (val_str == "false" || val_str == "no" || val_str == "n" || val_str == "off" || val_str == "f") { + para.input.out_mat_hs2[0] = 0; + } else { + try { + para.input.out_mat_hs2[0] = std::stoi(item.str_values[0]); + } catch (...) { + para.input.out_mat_hs2[0] = 0; + } + } para.input.out_mat_hs2[1] = 8; if (count >= 2) try { para.input.out_mat_hs2[1] = std::stoi(item.str_values[1]); } catch (const std::invalid_argument&) { /* do nothing */ } From 5cb01242972f54a316119555210d14254b80c3c3 Mon Sep 17 00:00:00 2001 From: haochong zhang Date: Wed, 20 May 2026 09:04:48 +0000 Subject: [PATCH 2/3] fix(io): include sparse_matrix.h in write_HS_R.cpp --- source/source_io/module_hs/write_HS_R.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/source/source_io/module_hs/write_HS_R.cpp b/source/source_io/module_hs/write_HS_R.cpp index f6c7db7f17d..5f6cc238551 100644 --- a/source/source_io/module_hs/write_HS_R.cpp +++ b/source/source_io/module_hs/write_HS_R.cpp @@ -7,6 +7,7 @@ #include "source_lcao/spar_hsr.h" #include "source_lcao/spar_st.h" #include "write_HS_sparse.h" +#include "source_io/module_output/sparse_matrix.h" namespace { // Helper: Convert sparse map to HContainer From 7b44e925903f8a75cd890cbca127c1cf87ed803a Mon Sep 17 00:00:00 2001 From: haochong zhang Date: Wed, 20 May 2026 09:07:47 +0000 Subject: [PATCH 3/3] fix(io): fix null pointer dereference in save_sparse binary mode --- source/source_io/module_hs/write_HS_sparse.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/source_io/module_hs/write_HS_sparse.cpp b/source/source_io/module_hs/write_HS_sparse.cpp index a279e1c4571..72e6bdb4db3 100644 --- a/source/source_io/module_hs/write_HS_sparse.cpp +++ b/source/source_io/module_hs/write_HS_sparse.cpp @@ -444,9 +444,10 @@ void ModuleIO::save_sparse( } else { ofs.open(sss.str().c_str(), std::ios::binary); } - ofs.write(reinterpret_cast(0), sizeof(int)); - ofs.write(reinterpret_cast(&nlocal), sizeof(int)); - ofs.write(reinterpret_cast(&output_R_number), sizeof(int)); + int step = std::max(istep, 0); + ofs.write(reinterpret_cast(&step), sizeof(int)); + ofs.write(reinterpret_cast(&nlocal), sizeof(int)); + ofs.write(reinterpret_cast(&output_R_number), sizeof(int)); } else { if (PARAM.inp.calculation == "md" && PARAM.inp.out_app_flag && istep) {