From f49acc65db6923ddd2249f4350ff0c76f1468186 Mon Sep 17 00:00:00 2001 From: linpz Date: Sat, 16 May 2026 02:03:01 +0800 Subject: [PATCH 1/4] Refactor: split RI_2D_Comm::split_m2D_ktoR() --- source/source_lcao/module_ri/RI_2D_Comm.h | 26 +++++-- source/source_lcao/module_ri/RI_2D_Comm.hpp | 86 +++++++++++++++++++-- 2 files changed, 102 insertions(+), 10 deletions(-) diff --git a/source/source_lcao/module_ri/RI_2D_Comm.h b/source/source_lcao/module_ri/RI_2D_Comm.h index a68c6bd8fd4..a46d524700f 100644 --- a/source/source_lcao/module_ri/RI_2D_Comm.h +++ b/source/source_lcao/module_ri/RI_2D_Comm.h @@ -32,10 +32,26 @@ using TAC = std::pair; template extern std::vector>>> split_m2D_ktoR( const UnitCell& ucell, - const K_Vectors& kv, - const std::vector& mks_2D, - const Parallel_2D& pv, - const int nspin, + const K_Vectors& kv, + const std::vector& mks_2D, + const Parallel_2D& pv, + const int nspin, + const bool spgsym = false); + +template +extern std::vector>>> split_m2D_ktoR_gamma( + const UnitCell& ucell, + const std::vector& mks_2D, + const Parallel_2D& pv, + const int nspin); + +template +extern std::vector>>> split_m2D_ktoR_general( + const UnitCell& ucell, + const K_Vectors& kv, + const std::vector& mks_2D, + const Parallel_2D& pv, + const int nspin, const bool spgsym = false); // judge[is] = {s0, s1} @@ -119,4 +135,4 @@ extern std::vector>>> split_m2D_kto #include "RI_2D_Comm.hpp" -#endif \ No newline at end of file +#endif diff --git a/source/source_lcao/module_ri/RI_2D_Comm.hpp b/source/source_lcao/module_ri/RI_2D_Comm.hpp index 4a1b37d8123..4b7d1c9d872 100644 --- a/source/source_lcao/module_ri/RI_2D_Comm.hpp +++ b/source/source_lcao/module_ri/RI_2D_Comm.hpp @@ -33,15 +33,91 @@ inline RI::Tensor> tensor_conj(const RI::Tensor auto RI_2D_Comm::split_m2D_ktoR(const UnitCell& ucell, - const K_Vectors & kv, - const std::vector&mks_2D, - const Parallel_2D & pv, - const int nspin, + const K_Vectors & kv, + const std::vector&mks_2D, + const Parallel_2D & pv, + const int nspin, const bool spgsym) -> std::vector>>> { ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR"); ModuleBase::timer::start("RI_2D_Comm", "split_m2D_ktoR"); + const TC period = RI_Util::get_Born_vonKarmen_period(kv); + std::vector>>> mRs_a2D + = (period == TC{1, 1, 1}) + ? RI_2D_Comm::split_m2D_ktoR_gamma(ucell, mks_2D, pv, nspin) + : RI_2D_Comm::split_m2D_ktoR_general(ucell, kv, mks_2D, pv, nspin, spgsym); + ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR"); + return mRs_a2D; +} + +template +auto RI_2D_Comm::split_m2D_ktoR_gamma(const UnitCell& ucell, + const std::vector& mks_2D, + const Parallel_2D& pv, + const int nspin) +-> std::vector>>> +{ + ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR_gamma"); + ModuleBase::timer::start("RI_2D_Comm", "split_m2D_ktoR_gamma"); + + const std::map nspin_k = {{1,1}, {2,2}, {4,1}}; + const double SPIN_multiple = std::map{ {1,0.5}, {2,1}, {4,1} }.at(nspin); // why? + const TC cell = {0, 0, 0}; + + std::vector>>> mRs_a2D(nspin); + for (int is_k = 0; is_k < nspin_k.at(nspin); ++is_k) + { + using Tdata_m = typename Tmatrix::value_type; + RI::Tensor mk_2D + = RI_Util::Vector_to_Tensor(*mks_2D[is_k], pv.get_col_size(), pv.get_row_size()); + const Tdata_m frac = RI::Global_Func::convert(SPIN_multiple); + RI::Tensor mR_2D = RI::Global_Func::convert(mk_2D * frac); + + for (int iwt0_2D = 0; iwt0_2D != mR_2D.shape[0]; ++iwt0_2D) + { + const int iwt0 = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) + ? pv.local2global_col(iwt0_2D) + : pv.local2global_row(iwt0_2D); + int iat0, iw0_b, is0_b; + std::tie(iat0, iw0_b, is0_b) = RI_2D_Comm::get_iat_iw_is_block(ucell, iwt0); + const int it0 = ucell.iat2it[iat0]; + for (int iwt1_2D = 0; iwt1_2D != mR_2D.shape[1]; ++iwt1_2D) + { + const int iwt1 = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) + ? pv.local2global_row(iwt1_2D) + : pv.local2global_col(iwt1_2D); + int iat1, iw1_b, is1_b; + std::tie(iat1, iw1_b, is1_b) = RI_2D_Comm::get_iat_iw_is_block(ucell, iwt1); + const int it1 = ucell.iat2it[iat1]; + + const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b); + RI::Tensor& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}]; + if (mR_a2D.empty()) + { + mR_a2D = RI::Tensor( + {static_cast(ucell.atoms[it0].nw), + static_cast(ucell.atoms[it1].nw)}); + } + mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D); + } + } + } + ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR_gamma"); + return mRs_a2D; +} + +template +auto RI_2D_Comm::split_m2D_ktoR_general(const UnitCell& ucell, + const K_Vectors& kv, + const std::vector& mks_2D, + const Parallel_2D& pv, + const int nspin, + const bool spgsym) +-> std::vector>>> +{ + ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR_general"); + ModuleBase::timer::start("RI_2D_Comm", "split_m2D_ktoR_general"); const TC period = RI_Util::get_Born_vonKarmen_period(kv); const std::map nspin_k = {{1,1}, {2,2}, {4,1}}; @@ -119,7 +195,7 @@ auto RI_2D_Comm::split_m2D_ktoR(const UnitCell& ucell, } } } - ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR"); + ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR_general"); return mRs_a2D; } From b1cac41393497930bbe029feee56c3cf75e6dc36 Mon Sep 17 00:00:00 2001 From: linpz Date: Sat, 16 May 2026 16:54:43 +0800 Subject: [PATCH 2/4] Feature: add OpenMP in RI_2D_Comm::split_m2D_ktoR() --- source/source_lcao/module_ri/RI_2D_Comm.hpp | 64 +++++++++++++-------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/source/source_lcao/module_ri/RI_2D_Comm.hpp b/source/source_lcao/module_ri/RI_2D_Comm.hpp index 4b7d1c9d872..c0128af8950 100644 --- a/source/source_lcao/module_ri/RI_2D_Comm.hpp +++ b/source/source_lcao/module_ri/RI_2D_Comm.hpp @@ -74,6 +74,9 @@ auto RI_2D_Comm::split_m2D_ktoR_gamma(const UnitCell& ucell, const Tdata_m frac = RI::Global_Func::convert(SPIN_multiple); RI::Tensor mR_2D = RI::Global_Func::convert(mk_2D * frac); + #ifdef _OPENMP + #pragma omp parallel for schedule(dynamic) + #endif for (int iwt0_2D = 0; iwt0_2D != mR_2D.shape[0]; ++iwt0_2D) { const int iwt0 = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) @@ -92,14 +95,19 @@ auto RI_2D_Comm::split_m2D_ktoR_gamma(const UnitCell& ucell, const int it1 = ucell.iat2it[iat1]; const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b); - RI::Tensor& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}]; - if (mR_a2D.empty()) + #ifdef _OPENMP + #pragma omp critical(RI_split_m2D_ktoR_gamma) + #endif { - mR_a2D = RI::Tensor( - {static_cast(ucell.atoms[it0].nw), - static_cast(ucell.atoms[it1].nw)}); + RI::Tensor& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}]; + if (mR_a2D.empty()) + { + mR_a2D = RI::Tensor( + {static_cast(ucell.atoms[it0].nw), + static_cast(ucell.atoms[it1].nw)}); + } + mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D); } - mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D); } } } @@ -127,19 +135,23 @@ auto RI_2D_Comm::split_m2D_ktoR_general(const UnitCell& ucell, for (int is_k = 0; is_k < nspin_k.at(nspin); ++is_k) { const std::vector ik_list = RI_2D_Comm::get_ik_list(kv, is_k); - for(const TC &cell : RI_Util::get_Born_von_Karmen_cells(period)) - { + const auto cells = RI_Util::get_Born_von_Karmen_cells(period); + #ifdef _OPENMP + #pragma omp parallel for schedule(dynamic) + #endif + for (size_t icell = 0; icell < cells.size(); ++icell) + { + const TC& cell = cells[icell]; RI::Tensor mR_2D; int ik_full = 0; for (const int ik : ik_list) { - auto set_mR_2D = [&mR_2D](auto&& mk_frac) { - if (mR_2D.empty()) { - mR_2D = RI::Global_Func::convert(mk_frac); - } else { - mR_2D - = mR_2D + RI::Global_Func::convert(mk_frac); - } + auto set_mR_2D = [&mR_2D](auto&& mk_frac) + { + if (mR_2D.empty()) + { mR_2D = RI::Global_Func::convert(mk_frac); } + else + { mR_2D = mR_2D + RI::Global_Func::convert(mk_frac); } }; using Tdata_m = typename Tmatrix::value_type; if (!spgsym) @@ -150,7 +162,8 @@ auto RI_2D_Comm::split_m2D_ktoR_general(const UnitCell& ucell, -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * ucell.latvec)))); if (static_cast(std::round(SPIN_multiple * kv.wk[ik] * kv.get_nkstot_full())) == 2) { set_mR_2D(mk_2D * (frac * 0.5) + tensor_conj(mk_2D * (frac * 0.5))); } - else { set_mR_2D(mk_2D * frac); } + else + { set_mR_2D(mk_2D * frac); } } else { // traverse kstar, ik means ik_ibz @@ -183,14 +196,19 @@ auto RI_2D_Comm::split_m2D_ktoR_general(const UnitCell& ucell, const int it1 = ucell.iat2it[iat1]; const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b); - RI::Tensor &mR_a2D = mRs_a2D[is_b][iat0][{iat1,cell}]; - if (mR_a2D.empty()) { - mR_a2D = RI::Tensor( - {static_cast(ucell.atoms[it0].nw), - static_cast( - ucell.atoms[it1].nw)}); + #ifdef _OPENMP + #pragma omp critical(RI_split_m2D_ktoR_general) + #endif + { + RI::Tensor& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}]; + if (mR_a2D.empty()) { + mR_a2D = RI::Tensor( + {static_cast(ucell.atoms[it0].nw), + static_cast( + ucell.atoms[it1].nw)}); + } + mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D); } - mR_a2D(iw0_b,iw1_b) = mR_2D(iwt0_2D, iwt1_2D); } } } From 85ebe5070e62a995b4a56e1d57eee771f0c98774 Mon Sep 17 00:00:00 2001 From: linpz Date: Sat, 16 May 2026 18:41:26 +0800 Subject: [PATCH 3/4] Feature: update OpenMP in RI_2D_Comm::split_m2D_ktoR_gamma() --- source/source_lcao/module_ri/RI_2D_Comm.hpp | 61 ++++++++++++++++----- 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/source/source_lcao/module_ri/RI_2D_Comm.hpp b/source/source_lcao/module_ri/RI_2D_Comm.hpp index c0128af8950..9b6108d9586 100644 --- a/source/source_lcao/module_ri/RI_2D_Comm.hpp +++ b/source/source_lcao/module_ri/RI_2D_Comm.hpp @@ -22,6 +22,10 @@ #include #include +#ifdef _OPENMP +#include +#endif + inline RI::Tensor tensor_conj(const RI::Tensor& t) { return t; } inline RI::Tensor> tensor_conj(const RI::Tensor>& t) { @@ -66,6 +70,18 @@ auto RI_2D_Comm::split_m2D_ktoR_gamma(const UnitCell& ucell, const TC cell = {0, 0, 0}; std::vector>>> mRs_a2D(nspin); + + #ifdef _OPENMP + // pre-init all outer maps mRs_a2D[is_b][iat] to avoid concurrent std::map rebalancing + for (int is_b = 0; is_b < nspin; ++is_b) + for (int iat0 = 0; iat0 < ucell.nat; ++iat0) + mRs_a2D[is_b][iat0]; + + std::vector locks(ucell.nat); + for (auto& l : locks) + omp_init_lock(&l); + #endif + for (int is_k = 0; is_k < nspin_k.at(nspin); ++is_k) { using Tdata_m = typename Tmatrix::value_type; @@ -95,22 +111,39 @@ auto RI_2D_Comm::split_m2D_ktoR_gamma(const UnitCell& ucell, const int it1 = ucell.iat2it[iat1]; const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b); - #ifdef _OPENMP - #pragma omp critical(RI_split_m2D_ktoR_gamma) - #endif + #ifdef _OPENMP + omp_set_lock(&locks[iat0]); + #endif + RI::Tensor& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}]; + if (mR_a2D.empty()) { - RI::Tensor& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}]; - if (mR_a2D.empty()) - { - mR_a2D = RI::Tensor( - {static_cast(ucell.atoms[it0].nw), - static_cast(ucell.atoms[it1].nw)}); - } - mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D); + mR_a2D = RI::Tensor( + {static_cast(ucell.atoms[it0].nw), + static_cast(ucell.atoms[it1].nw)}); } + mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D); + #ifdef _OPENMP + omp_unset_lock(&locks[iat0]); + #endif } } } + + #ifdef _OPENMP + for (auto& l : locks) + omp_destroy_lock(&l); + + // prune empty inner maps created by pre-init + for (int is_b = 0; is_b < nspin; ++is_b) + for (auto it = mRs_a2D[is_b].begin(); it != mRs_a2D[is_b].end();) + { + if (it->second.empty()) + it = mRs_a2D[is_b].erase(it); + else + ++it; + } + #endif + ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR_gamma"); return mRs_a2D; } @@ -201,11 +234,11 @@ auto RI_2D_Comm::split_m2D_ktoR_general(const UnitCell& ucell, #endif { RI::Tensor& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}]; - if (mR_a2D.empty()) { + if (mR_a2D.empty()) + { mR_a2D = RI::Tensor( {static_cast(ucell.atoms[it0].nw), - static_cast( - ucell.atoms[it1].nw)}); + static_cast(ucell.atoms[it1].nw)}); } mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D); } From 95d8a619c3123a60442f468464d591c3e4bdeb73 Mon Sep 17 00:00:00 2001 From: linpz Date: Sat, 16 May 2026 19:51:53 +0800 Subject: [PATCH 4/4] Feature: update OpenMP in RI_2D_Comm::split_m2D_ktoR_k() --- source/source_lcao/module_ri/RI_2D_Comm.h | 2 +- source/source_lcao/module_ri/RI_2D_Comm.hpp | 150 +++++++++++--------- 2 files changed, 82 insertions(+), 70 deletions(-) diff --git a/source/source_lcao/module_ri/RI_2D_Comm.h b/source/source_lcao/module_ri/RI_2D_Comm.h index a46d524700f..88b0e3421cc 100644 --- a/source/source_lcao/module_ri/RI_2D_Comm.h +++ b/source/source_lcao/module_ri/RI_2D_Comm.h @@ -46,7 +46,7 @@ extern std::vector>>> split_m2D_kto const int nspin); template -extern std::vector>>> split_m2D_ktoR_general( +extern std::vector>>> split_m2D_ktoR_k( const UnitCell& ucell, const K_Vectors& kv, const std::vector& mks_2D, diff --git a/source/source_lcao/module_ri/RI_2D_Comm.hpp b/source/source_lcao/module_ri/RI_2D_Comm.hpp index 9b6108d9586..7df8d8bb5ac 100644 --- a/source/source_lcao/module_ri/RI_2D_Comm.hpp +++ b/source/source_lcao/module_ri/RI_2D_Comm.hpp @@ -50,7 +50,7 @@ auto RI_2D_Comm::split_m2D_ktoR(const UnitCell& ucell, std::vector>>> mRs_a2D = (period == TC{1, 1, 1}) ? RI_2D_Comm::split_m2D_ktoR_gamma(ucell, mks_2D, pv, nspin) - : RI_2D_Comm::split_m2D_ktoR_general(ucell, kv, mks_2D, pv, nspin, spgsym); + : RI_2D_Comm::split_m2D_ktoR_k(ucell, kv, mks_2D, pv, nspin, spgsym); ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR"); return mRs_a2D; } @@ -149,7 +149,7 @@ auto RI_2D_Comm::split_m2D_ktoR_gamma(const UnitCell& ucell, } template -auto RI_2D_Comm::split_m2D_ktoR_general(const UnitCell& ucell, +auto RI_2D_Comm::split_m2D_ktoR_k(const UnitCell& ucell, const K_Vectors& kv, const std::vector& mks_2D, const Parallel_2D& pv, @@ -157,83 +157,82 @@ auto RI_2D_Comm::split_m2D_ktoR_general(const UnitCell& ucell, const bool spgsym) -> std::vector>>> { - ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR_general"); - ModuleBase::timer::start("RI_2D_Comm", "split_m2D_ktoR_general"); + ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR_k"); + ModuleBase::timer::start("RI_2D_Comm", "split_m2D_ktoR_k"); const TC period = RI_Util::get_Born_vonKarmen_period(kv); const std::map nspin_k = {{1,1}, {2,2}, {4,1}}; const double SPIN_multiple = std::map{ {1,0.5}, {2,1}, {4,1} }.at(nspin); // why? std::vector>>> mRs_a2D(nspin); - for (int is_k = 0; is_k < nspin_k.at(nspin); ++is_k) - { - const std::vector ik_list = RI_2D_Comm::get_ik_list(kv, is_k); - const auto cells = RI_Util::get_Born_von_Karmen_cells(period); - #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic) - #endif - for (size_t icell = 0; icell < cells.size(); ++icell) + #ifdef _OPENMP + #pragma omp parallel + #endif + { + std::vector>>> mRs_a2D_thread(nspin); + for (int is_k = 0; is_k < nspin_k.at(nspin); ++is_k) { - const TC& cell = cells[icell]; - RI::Tensor mR_2D; - int ik_full = 0; - for (const int ik : ik_list) + const std::vector ik_list = RI_2D_Comm::get_ik_list(kv, is_k); + const auto cells = RI_Util::get_Born_von_Karmen_cells(period); + #pragma omp for schedule(dynamic) + for (size_t icell = 0; icell < cells.size(); ++icell) { - auto set_mR_2D = [&mR_2D](auto&& mk_frac) + const TC& cell = cells[icell]; + RI::Tensor mR_2D; + int ik_full = 0; + for (const int ik : ik_list) { - if (mR_2D.empty()) - { mR_2D = RI::Global_Func::convert(mk_frac); } - else - { mR_2D = mR_2D + RI::Global_Func::convert(mk_frac); } - }; - using Tdata_m = typename Tmatrix::value_type; - if (!spgsym) - { - RI::Tensor mk_2D = RI_Util::Vector_to_Tensor(*mks_2D[ik], pv.get_col_size(), pv.get_row_size()); - const Tdata_m frac = SPIN_multiple - * RI::Global_Func::convert(std::exp( - -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * ucell.latvec)))); - if (static_cast(std::round(SPIN_multiple * kv.wk[ik] * kv.get_nkstot_full())) == 2) - { set_mR_2D(mk_2D * (frac * 0.5) + tensor_conj(mk_2D * (frac * 0.5))); } - else - { set_mR_2D(mk_2D * frac); } - } - else - { // traverse kstar, ik means ik_ibz - for (auto& isym_kvd : kv.kstars[ik % ik_list.size()]) + auto set_mR_2D = [&mR_2D](auto&& mk_frac) + { + if (mR_2D.empty()) + { mR_2D = RI::Global_Func::convert(mk_frac); } + else + { mR_2D = mR_2D + RI::Global_Func::convert(mk_frac); } + }; + using Tdata_m = typename Tmatrix::value_type; + if (!spgsym) { - RI::Tensor mk_2D = RI_Util::Vector_to_Tensor(*mks_2D[ik_full + is_k * kv.get_nkstot_full()], pv.get_col_size(), pv.get_row_size()); + RI::Tensor mk_2D = RI_Util::Vector_to_Tensor(*mks_2D[ik], pv.get_col_size(), pv.get_row_size()); const Tdata_m frac = SPIN_multiple * RI::Global_Func::convert(std::exp( - -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * ((isym_kvd.second * ucell.G) * (RI_Util::array3_to_Vector3(cell) * ucell.latvec)))); - set_mR_2D(mk_2D * frac); - ++ik_full; + -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * ucell.latvec)))); + if (static_cast(std::round(SPIN_multiple * kv.wk[ik] * kv.get_nkstot_full())) == 2) + { set_mR_2D(mk_2D * (frac * 0.5) + tensor_conj(mk_2D * (frac * 0.5))); } + else + { set_mR_2D(mk_2D * frac); } } - } - } - for(int iwt0_2D=0; iwt0_2D!=mR_2D.shape[0]; ++iwt0_2D) - { - const int iwt0 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) - ? pv.local2global_col(iwt0_2D) - : pv.local2global_row(iwt0_2D); - int iat0, iw0_b, is0_b; - std::tie(iat0,iw0_b,is0_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt0); - const int it0 = ucell.iat2it[iat0]; - for(int iwt1_2D=0; iwt1_2D!=mR_2D.shape[1]; ++iwt1_2D) - { - const int iwt1 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) - ? pv.local2global_row(iwt1_2D) - : pv.local2global_col(iwt1_2D); - int iat1, iw1_b, is1_b; - std::tie(iat1,iw1_b,is1_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt1); - const int it1 = ucell.iat2it[iat1]; - - const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b); - #ifdef _OPENMP - #pragma omp critical(RI_split_m2D_ktoR_general) - #endif + else + { // traverse kstar, ik means ik_ibz + for (auto& isym_kvd : kv.kstars[ik % ik_list.size()]) + { + RI::Tensor mk_2D = RI_Util::Vector_to_Tensor(*mks_2D[ik_full + is_k * kv.get_nkstot_full()], pv.get_col_size(), pv.get_row_size()); + const Tdata_m frac = SPIN_multiple + * RI::Global_Func::convert(std::exp( + -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * ((isym_kvd.second * ucell.G) * (RI_Util::array3_to_Vector3(cell) * ucell.latvec)))); + set_mR_2D(mk_2D * frac); + ++ik_full; + } + } + } // end for ik + for(int iwt0_2D=0; iwt0_2D!=mR_2D.shape[0]; ++iwt0_2D) + { + const int iwt0 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) + ? pv.local2global_col(iwt0_2D) + : pv.local2global_row(iwt0_2D); + int iat0, iw0_b, is0_b; + std::tie(iat0,iw0_b,is0_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt0); + const int it0 = ucell.iat2it[iat0]; + for(int iwt1_2D=0; iwt1_2D!=mR_2D.shape[1]; ++iwt1_2D) { - RI::Tensor& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}]; + const int iwt1 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) + ? pv.local2global_row(iwt1_2D) + : pv.local2global_col(iwt1_2D); + int iat1, iw1_b, is1_b; + std::tie(iat1,iw1_b,is1_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt1); + const int it1 = ucell.iat2it[iat1]; + + const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b); + RI::Tensor& mR_a2D = mRs_a2D_thread[is_b][iat0][{iat1, cell}]; if (mR_a2D.empty()) { mR_a2D = RI::Tensor( @@ -241,12 +240,25 @@ auto RI_2D_Comm::split_m2D_ktoR_general(const UnitCell& ucell, static_cast(ucell.atoms[it1].nw)}); } mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D); + } // for iwt1_2D + } // end for iwt0_2D + } // end for icell + } // end for is_k + + #ifdef _OPENMP + #pragma omp critical + #endif + { + for(int is=0; is