diff --git a/source/source_lcao/module_ri/RI_2D_Comm.h b/source/source_lcao/module_ri/RI_2D_Comm.h index a68c6bd8fd..88b0e3421c 100644 --- a/source/source_lcao/module_ri/RI_2D_Comm.h +++ b/source/source_lcao/module_ri/RI_2D_Comm.h @@ -32,10 +32,26 @@ using TAC = std::pair; template extern std::vector>>> split_m2D_ktoR( const UnitCell& ucell, - const K_Vectors& kv, - const std::vector& mks_2D, - const Parallel_2D& pv, - const int nspin, + const K_Vectors& kv, + const std::vector& mks_2D, + const Parallel_2D& pv, + const int nspin, + const bool spgsym = false); + +template +extern std::vector>>> split_m2D_ktoR_gamma( + const UnitCell& ucell, + const std::vector& mks_2D, + const Parallel_2D& pv, + const int nspin); + +template +extern std::vector>>> split_m2D_ktoR_k( + const UnitCell& ucell, + const K_Vectors& kv, + const std::vector& mks_2D, + const Parallel_2D& pv, + const int nspin, const bool spgsym = false); // judge[is] = {s0, s1} @@ -119,4 +135,4 @@ extern std::vector>>> split_m2D_kto #include "RI_2D_Comm.hpp" -#endif \ No newline at end of file +#endif diff --git a/source/source_lcao/module_ri/RI_2D_Comm.hpp b/source/source_lcao/module_ri/RI_2D_Comm.hpp index 4a1b37d812..7df8d8bb5a 100644 --- a/source/source_lcao/module_ri/RI_2D_Comm.hpp +++ b/source/source_lcao/module_ri/RI_2D_Comm.hpp @@ -22,6 +22,10 @@ #include #include +#ifdef _OPENMP +#include +#endif + inline RI::Tensor tensor_conj(const RI::Tensor& t) { return t; } inline RI::Tensor> tensor_conj(const RI::Tensor>& t) { @@ -33,93 +37,228 @@ inline RI::Tensor> tensor_conj(const RI::Tensor auto RI_2D_Comm::split_m2D_ktoR(const UnitCell& ucell, - const K_Vectors & kv, - const std::vector&mks_2D, - const Parallel_2D & pv, - const int nspin, + const K_Vectors & kv, + const std::vector&mks_2D, + const Parallel_2D & pv, + const int nspin, const bool spgsym) -> std::vector>>> { ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR"); ModuleBase::timer::start("RI_2D_Comm", "split_m2D_ktoR"); - const TC period = RI_Util::get_Born_vonKarmen_period(kv); + std::vector>>> mRs_a2D + = (period == TC{1, 1, 1}) + ? RI_2D_Comm::split_m2D_ktoR_gamma(ucell, mks_2D, pv, nspin) + : RI_2D_Comm::split_m2D_ktoR_k(ucell, kv, mks_2D, pv, nspin, spgsym); + ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR"); + return mRs_a2D; +} + +template +auto RI_2D_Comm::split_m2D_ktoR_gamma(const UnitCell& ucell, + const std::vector& mks_2D, + const Parallel_2D& pv, + const int nspin) +-> std::vector>>> +{ + ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR_gamma"); + ModuleBase::timer::start("RI_2D_Comm", "split_m2D_ktoR_gamma"); + const std::map nspin_k = {{1,1}, {2,2}, {4,1}}; const double SPIN_multiple = std::map{ {1,0.5}, {2,1}, {4,1} }.at(nspin); // why? + const TC cell = {0, 0, 0}; std::vector>>> mRs_a2D(nspin); + + #ifdef _OPENMP + // pre-init all outer maps mRs_a2D[is_b][iat] to avoid concurrent std::map rebalancing + for (int is_b = 0; is_b < nspin; ++is_b) + for (int iat0 = 0; iat0 < ucell.nat; ++iat0) + mRs_a2D[is_b][iat0]; + + std::vector locks(ucell.nat); + for (auto& l : locks) + omp_init_lock(&l); + #endif + for (int is_k = 0; is_k < nspin_k.at(nspin); ++is_k) - { - const std::vector ik_list = RI_2D_Comm::get_ik_list(kv, is_k); - for(const TC &cell : RI_Util::get_Born_von_Karmen_cells(period)) - { - RI::Tensor mR_2D; - int ik_full = 0; - for (const int ik : ik_list) + { + using Tdata_m = typename Tmatrix::value_type; + RI::Tensor mk_2D + = RI_Util::Vector_to_Tensor(*mks_2D[is_k], pv.get_col_size(), pv.get_row_size()); + const Tdata_m frac = RI::Global_Func::convert(SPIN_multiple); + RI::Tensor mR_2D = RI::Global_Func::convert(mk_2D * frac); + + #ifdef _OPENMP + #pragma omp parallel for schedule(dynamic) + #endif + for (int iwt0_2D = 0; iwt0_2D != mR_2D.shape[0]; ++iwt0_2D) + { + const int iwt0 = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) + ? pv.local2global_col(iwt0_2D) + : pv.local2global_row(iwt0_2D); + int iat0, iw0_b, is0_b; + std::tie(iat0, iw0_b, is0_b) = RI_2D_Comm::get_iat_iw_is_block(ucell, iwt0); + const int it0 = ucell.iat2it[iat0]; + for (int iwt1_2D = 0; iwt1_2D != mR_2D.shape[1]; ++iwt1_2D) { - auto set_mR_2D = [&mR_2D](auto&& mk_frac) { - if (mR_2D.empty()) { - mR_2D = RI::Global_Func::convert(mk_frac); - } else { - mR_2D - = mR_2D + RI::Global_Func::convert(mk_frac); - } - }; - using Tdata_m = typename Tmatrix::value_type; - if (!spgsym) + const int iwt1 = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) + ? pv.local2global_row(iwt1_2D) + : pv.local2global_col(iwt1_2D); + int iat1, iw1_b, is1_b; + std::tie(iat1, iw1_b, is1_b) = RI_2D_Comm::get_iat_iw_is_block(ucell, iwt1); + const int it1 = ucell.iat2it[iat1]; + + const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b); + #ifdef _OPENMP + omp_set_lock(&locks[iat0]); + #endif + RI::Tensor& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}]; + if (mR_a2D.empty()) { - RI::Tensor mk_2D = RI_Util::Vector_to_Tensor(*mks_2D[ik], pv.get_col_size(), pv.get_row_size()); - const Tdata_m frac = SPIN_multiple - * RI::Global_Func::convert(std::exp( - -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * ucell.latvec)))); - if (static_cast(std::round(SPIN_multiple * kv.wk[ik] * kv.get_nkstot_full())) == 2) - { set_mR_2D(mk_2D * (frac * 0.5) + tensor_conj(mk_2D * (frac * 0.5))); } - else { set_mR_2D(mk_2D * frac); } + mR_a2D = RI::Tensor( + {static_cast(ucell.atoms[it0].nw), + static_cast(ucell.atoms[it1].nw)}); } + mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D); + #ifdef _OPENMP + omp_unset_lock(&locks[iat0]); + #endif + } + } + } + + #ifdef _OPENMP + for (auto& l : locks) + omp_destroy_lock(&l); + + // prune empty inner maps created by pre-init + for (int is_b = 0; is_b < nspin; ++is_b) + for (auto it = mRs_a2D[is_b].begin(); it != mRs_a2D[is_b].end();) + { + if (it->second.empty()) + it = mRs_a2D[is_b].erase(it); else - { // traverse kstar, ik means ik_ibz - for (auto& isym_kvd : kv.kstars[ik % ik_list.size()]) + ++it; + } + #endif + + ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR_gamma"); + return mRs_a2D; +} + +template +auto RI_2D_Comm::split_m2D_ktoR_k(const UnitCell& ucell, + const K_Vectors& kv, + const std::vector& mks_2D, + const Parallel_2D& pv, + const int nspin, + const bool spgsym) +-> std::vector>>> +{ + ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR_k"); + ModuleBase::timer::start("RI_2D_Comm", "split_m2D_ktoR_k"); + + const TC period = RI_Util::get_Born_vonKarmen_period(kv); + const std::map nspin_k = {{1,1}, {2,2}, {4,1}}; + const double SPIN_multiple = std::map{ {1,0.5}, {2,1}, {4,1} }.at(nspin); // why? + + std::vector>>> mRs_a2D(nspin); + #ifdef _OPENMP + #pragma omp parallel + #endif + { + std::vector>>> mRs_a2D_thread(nspin); + for (int is_k = 0; is_k < nspin_k.at(nspin); ++is_k) + { + const std::vector ik_list = RI_2D_Comm::get_ik_list(kv, is_k); + const auto cells = RI_Util::get_Born_von_Karmen_cells(period); + #pragma omp for schedule(dynamic) + for (size_t icell = 0; icell < cells.size(); ++icell) + { + const TC& cell = cells[icell]; + RI::Tensor mR_2D; + int ik_full = 0; + for (const int ik : ik_list) + { + auto set_mR_2D = [&mR_2D](auto&& mk_frac) + { + if (mR_2D.empty()) + { mR_2D = RI::Global_Func::convert(mk_frac); } + else + { mR_2D = mR_2D + RI::Global_Func::convert(mk_frac); } + }; + using Tdata_m = typename Tmatrix::value_type; + if (!spgsym) { - RI::Tensor mk_2D = RI_Util::Vector_to_Tensor(*mks_2D[ik_full + is_k * kv.get_nkstot_full()], pv.get_col_size(), pv.get_row_size()); + RI::Tensor mk_2D = RI_Util::Vector_to_Tensor(*mks_2D[ik], pv.get_col_size(), pv.get_row_size()); const Tdata_m frac = SPIN_multiple * RI::Global_Func::convert(std::exp( - -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * ((isym_kvd.second * ucell.G) * (RI_Util::array3_to_Vector3(cell) * ucell.latvec)))); - set_mR_2D(mk_2D * frac); - ++ik_full; + -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * ucell.latvec)))); + if (static_cast(std::round(SPIN_multiple * kv.wk[ik] * kv.get_nkstot_full())) == 2) + { set_mR_2D(mk_2D * (frac * 0.5) + tensor_conj(mk_2D * (frac * 0.5))); } + else + { set_mR_2D(mk_2D * frac); } } - } - } - for(int iwt0_2D=0; iwt0_2D!=mR_2D.shape[0]; ++iwt0_2D) - { - const int iwt0 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) - ? pv.local2global_col(iwt0_2D) - : pv.local2global_row(iwt0_2D); - int iat0, iw0_b, is0_b; - std::tie(iat0,iw0_b,is0_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt0); - const int it0 = ucell.iat2it[iat0]; - for(int iwt1_2D=0; iwt1_2D!=mR_2D.shape[1]; ++iwt1_2D) - { - const int iwt1 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) - ? pv.local2global_row(iwt1_2D) - : pv.local2global_col(iwt1_2D); - int iat1, iw1_b, is1_b; - std::tie(iat1,iw1_b,is1_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt1); - const int it1 = ucell.iat2it[iat1]; - - const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b); - RI::Tensor &mR_a2D = mRs_a2D[is_b][iat0][{iat1,cell}]; - if (mR_a2D.empty()) { - mR_a2D = RI::Tensor( - {static_cast(ucell.atoms[it0].nw), - static_cast( - ucell.atoms[it1].nw)}); + else + { // traverse kstar, ik means ik_ibz + for (auto& isym_kvd : kv.kstars[ik % ik_list.size()]) + { + RI::Tensor mk_2D = RI_Util::Vector_to_Tensor(*mks_2D[ik_full + is_k * kv.get_nkstot_full()], pv.get_col_size(), pv.get_row_size()); + const Tdata_m frac = SPIN_multiple + * RI::Global_Func::convert(std::exp( + -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * ((isym_kvd.second * ucell.G) * (RI_Util::array3_to_Vector3(cell) * ucell.latvec)))); + set_mR_2D(mk_2D * frac); + ++ik_full; + } + } + } // end for ik + for(int iwt0_2D=0; iwt0_2D!=mR_2D.shape[0]; ++iwt0_2D) + { + const int iwt0 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) + ? pv.local2global_col(iwt0_2D) + : pv.local2global_row(iwt0_2D); + int iat0, iw0_b, is0_b; + std::tie(iat0,iw0_b,is0_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt0); + const int it0 = ucell.iat2it[iat0]; + for(int iwt1_2D=0; iwt1_2D!=mR_2D.shape[1]; ++iwt1_2D) + { + const int iwt1 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) + ? pv.local2global_row(iwt1_2D) + : pv.local2global_col(iwt1_2D); + int iat1, iw1_b, is1_b; + std::tie(iat1,iw1_b,is1_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt1); + const int it1 = ucell.iat2it[iat1]; + + const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b); + RI::Tensor& mR_a2D = mRs_a2D_thread[is_b][iat0][{iat1, cell}]; + if (mR_a2D.empty()) + { + mR_a2D = RI::Tensor( + {static_cast(ucell.atoms[it0].nw), + static_cast(ucell.atoms[it1].nw)}); + } + mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D); + } // for iwt1_2D + } // end for iwt0_2D + } // end for icell + } // end for is_k + + #ifdef _OPENMP + #pragma omp critical + #endif + { + for(int is=0; is