Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 21 additions & 5 deletions source/source_lcao/module_ri/RI_2D_Comm.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,26 @@ using TAC = std::pair<TA, TC>;
template <typename Tdata, typename Tmatrix>
extern std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> split_m2D_ktoR(
const UnitCell& ucell,
const K_Vectors& kv,
const std::vector<const Tmatrix*>& mks_2D,
const Parallel_2D& pv,
const int nspin,
const K_Vectors& kv,
const std::vector<const Tmatrix*>& mks_2D,
const Parallel_2D& pv,
const int nspin,
const bool spgsym = false);

template <typename Tdata, typename Tmatrix>
extern std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> split_m2D_ktoR_gamma(
const UnitCell& ucell,
const std::vector<const Tmatrix*>& mks_2D,
const Parallel_2D& pv,
const int nspin);

template <typename Tdata, typename Tmatrix>
extern std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> split_m2D_ktoR_k(
const UnitCell& ucell,
const K_Vectors& kv,
const std::vector<const Tmatrix*>& mks_2D,
const Parallel_2D& pv,
const int nspin,
const bool spgsym = false);

// judge[is] = {s0, s1}
Expand Down Expand Up @@ -119,4 +135,4 @@ extern std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> split_m2D_kto

#include "RI_2D_Comm.hpp"

#endif
#endif
271 changes: 205 additions & 66 deletions source/source_lcao/module_ri/RI_2D_Comm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
#include <string>
#include <stdexcept>

#ifdef _OPENMP
#include <omp.h>
#endif

inline RI::Tensor<double> tensor_conj(const RI::Tensor<double>& t) { return t; }
inline RI::Tensor<std::complex<double>> tensor_conj(const RI::Tensor<std::complex<double>>& t)
{
Expand All @@ -33,93 +37,228 @@ inline RI::Tensor<std::complex<double>> tensor_conj(const RI::Tensor<std::comple
}
template<typename Tdata, typename Tmatrix>
auto RI_2D_Comm::split_m2D_ktoR(const UnitCell& ucell,
const K_Vectors & kv,
const std::vector<const Tmatrix*>&mks_2D,
const Parallel_2D & pv,
const int nspin,
const K_Vectors & kv,
const std::vector<const Tmatrix*>&mks_2D,
const Parallel_2D & pv,
const int nspin,
const bool spgsym)
-> std::vector<std::map<TA,std::map<TAC,RI::Tensor<Tdata>>>>
{
ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR");
ModuleBase::timer::start("RI_2D_Comm", "split_m2D_ktoR");

const TC period = RI_Util::get_Born_vonKarmen_period(kv);
std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> mRs_a2D
= (period == TC{1, 1, 1})
? RI_2D_Comm::split_m2D_ktoR_gamma<Tdata, Tmatrix>(ucell, mks_2D, pv, nspin)
: RI_2D_Comm::split_m2D_ktoR_k<Tdata, Tmatrix>(ucell, kv, mks_2D, pv, nspin, spgsym);
ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR");
return mRs_a2D;
}

template<typename Tdata, typename Tmatrix>
auto RI_2D_Comm::split_m2D_ktoR_gamma(const UnitCell& ucell,
const std::vector<const Tmatrix*>& mks_2D,
const Parallel_2D& pv,
const int nspin)
-> std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>
{
ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR_gamma");
ModuleBase::timer::start("RI_2D_Comm", "split_m2D_ktoR_gamma");

const std::map<int,int> nspin_k = {{1,1}, {2,2}, {4,1}};
const double SPIN_multiple = std::map<int, double>{ {1,0.5}, {2,1}, {4,1} }.at(nspin); // why?
const TC cell = {0, 0, 0};

std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> mRs_a2D(nspin);

#ifdef _OPENMP
// pre-init all outer maps mRs_a2D[is_b][iat] to avoid concurrent std::map rebalancing
for (int is_b = 0; is_b < nspin; ++is_b)
for (int iat0 = 0; iat0 < ucell.nat; ++iat0)
mRs_a2D[is_b][iat0];

std::vector<omp_lock_t> locks(ucell.nat);
for (auto& l : locks)
omp_init_lock(&l);
#endif

for (int is_k = 0; is_k < nspin_k.at(nspin); ++is_k)
{
const std::vector<int> ik_list = RI_2D_Comm::get_ik_list(kv, is_k);
for(const TC &cell : RI_Util::get_Born_von_Karmen_cells(period))
{
RI::Tensor<Tdata> mR_2D;
int ik_full = 0;
for (const int ik : ik_list)
{
using Tdata_m = typename Tmatrix::value_type;
RI::Tensor<Tdata_m> mk_2D
= RI_Util::Vector_to_Tensor<Tdata_m>(*mks_2D[is_k], pv.get_col_size(), pv.get_row_size());
const Tdata_m frac = RI::Global_Func::convert<Tdata_m>(SPIN_multiple);
RI::Tensor<Tdata> mR_2D = RI::Global_Func::convert<Tdata>(mk_2D * frac);

#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic)
#endif
for (int iwt0_2D = 0; iwt0_2D != mR_2D.shape[0]; ++iwt0_2D)
{
const int iwt0 = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)
? pv.local2global_col(iwt0_2D)
: pv.local2global_row(iwt0_2D);
int iat0, iw0_b, is0_b;
std::tie(iat0, iw0_b, is0_b) = RI_2D_Comm::get_iat_iw_is_block(ucell, iwt0);
const int it0 = ucell.iat2it[iat0];
for (int iwt1_2D = 0; iwt1_2D != mR_2D.shape[1]; ++iwt1_2D)
{
auto set_mR_2D = [&mR_2D](auto&& mk_frac) {
if (mR_2D.empty()) {
mR_2D = RI::Global_Func::convert<Tdata>(mk_frac);
} else {
mR_2D
= mR_2D + RI::Global_Func::convert<Tdata>(mk_frac);
}
};
using Tdata_m = typename Tmatrix::value_type;
if (!spgsym)
const int iwt1 = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)
? pv.local2global_row(iwt1_2D)
: pv.local2global_col(iwt1_2D);
int iat1, iw1_b, is1_b;
std::tie(iat1, iw1_b, is1_b) = RI_2D_Comm::get_iat_iw_is_block(ucell, iwt1);
const int it1 = ucell.iat2it[iat1];

const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b);
#ifdef _OPENMP
omp_set_lock(&locks[iat0]);
#endif
RI::Tensor<Tdata>& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}];
if (mR_a2D.empty())
{
RI::Tensor<Tdata_m> mk_2D = RI_Util::Vector_to_Tensor<Tdata_m>(*mks_2D[ik], pv.get_col_size(), pv.get_row_size());
const Tdata_m frac = SPIN_multiple
* RI::Global_Func::convert<Tdata_m>(std::exp(
-ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * ucell.latvec))));
if (static_cast<int>(std::round(SPIN_multiple * kv.wk[ik] * kv.get_nkstot_full())) == 2)
{ set_mR_2D(mk_2D * (frac * 0.5) + tensor_conj(mk_2D * (frac * 0.5))); }
else { set_mR_2D(mk_2D * frac); }
mR_a2D = RI::Tensor<Tdata>(
{static_cast<size_t>(ucell.atoms[it0].nw),
static_cast<size_t>(ucell.atoms[it1].nw)});
}
mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D);
#ifdef _OPENMP
omp_unset_lock(&locks[iat0]);
#endif
}
}
}

#ifdef _OPENMP
for (auto& l : locks)
omp_destroy_lock(&l);

// prune empty inner maps created by pre-init
for (int is_b = 0; is_b < nspin; ++is_b)
for (auto it = mRs_a2D[is_b].begin(); it != mRs_a2D[is_b].end();)
{
if (it->second.empty())
it = mRs_a2D[is_b].erase(it);
else
{ // traverse kstar, ik means ik_ibz
for (auto& isym_kvd : kv.kstars[ik % ik_list.size()])
++it;
}
#endif

ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR_gamma");
return mRs_a2D;
}

template<typename Tdata, typename Tmatrix>
auto RI_2D_Comm::split_m2D_ktoR_k(const UnitCell& ucell,
const K_Vectors& kv,
const std::vector<const Tmatrix*>& mks_2D,
const Parallel_2D& pv,
const int nspin,
const bool spgsym)
-> std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>
{
ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR_k");
ModuleBase::timer::start("RI_2D_Comm", "split_m2D_ktoR_k");

const TC period = RI_Util::get_Born_vonKarmen_period(kv);
const std::map<int,int> nspin_k = {{1,1}, {2,2}, {4,1}};
const double SPIN_multiple = std::map<int, double>{ {1,0.5}, {2,1}, {4,1} }.at(nspin); // why?

std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> mRs_a2D(nspin);
#ifdef _OPENMP
#pragma omp parallel
#endif
{
std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> mRs_a2D_thread(nspin);
for (int is_k = 0; is_k < nspin_k.at(nspin); ++is_k)
{
const std::vector<int> ik_list = RI_2D_Comm::get_ik_list(kv, is_k);
const auto cells = RI_Util::get_Born_von_Karmen_cells(period);
#pragma omp for schedule(dynamic)
for (size_t icell = 0; icell < cells.size(); ++icell)
{
const TC& cell = cells[icell];
RI::Tensor<Tdata> mR_2D;
int ik_full = 0;
for (const int ik : ik_list)
{
auto set_mR_2D = [&mR_2D](auto&& mk_frac)
{
if (mR_2D.empty())
{ mR_2D = RI::Global_Func::convert<Tdata>(mk_frac); }
else
{ mR_2D = mR_2D + RI::Global_Func::convert<Tdata>(mk_frac); }
};
using Tdata_m = typename Tmatrix::value_type;
if (!spgsym)
{
RI::Tensor<Tdata_m> mk_2D = RI_Util::Vector_to_Tensor<Tdata_m>(*mks_2D[ik_full + is_k * kv.get_nkstot_full()], pv.get_col_size(), pv.get_row_size());
RI::Tensor<Tdata_m> mk_2D = RI_Util::Vector_to_Tensor<Tdata_m>(*mks_2D[ik], pv.get_col_size(), pv.get_row_size());
const Tdata_m frac = SPIN_multiple
* RI::Global_Func::convert<Tdata_m>(std::exp(
-ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * ((isym_kvd.second * ucell.G) * (RI_Util::array3_to_Vector3(cell) * ucell.latvec))));
set_mR_2D(mk_2D * frac);
++ik_full;
-ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * ucell.latvec))));
if (static_cast<int>(std::round(SPIN_multiple * kv.wk[ik] * kv.get_nkstot_full())) == 2)
{ set_mR_2D(mk_2D * (frac * 0.5) + tensor_conj(mk_2D * (frac * 0.5))); }
else
{ set_mR_2D(mk_2D * frac); }
}
}
}
for(int iwt0_2D=0; iwt0_2D!=mR_2D.shape[0]; ++iwt0_2D)
{
const int iwt0 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)
? pv.local2global_col(iwt0_2D)
: pv.local2global_row(iwt0_2D);
int iat0, iw0_b, is0_b;
std::tie(iat0,iw0_b,is0_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt0);
const int it0 = ucell.iat2it[iat0];
for(int iwt1_2D=0; iwt1_2D!=mR_2D.shape[1]; ++iwt1_2D)
{
const int iwt1 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)
? pv.local2global_row(iwt1_2D)
: pv.local2global_col(iwt1_2D);
int iat1, iw1_b, is1_b;
std::tie(iat1,iw1_b,is1_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt1);
const int it1 = ucell.iat2it[iat1];

const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b);
RI::Tensor<Tdata> &mR_a2D = mRs_a2D[is_b][iat0][{iat1,cell}];
if (mR_a2D.empty()) {
mR_a2D = RI::Tensor<Tdata>(
{static_cast<size_t>(ucell.atoms[it0].nw),
static_cast<size_t>(
ucell.atoms[it1].nw)});
else
{ // traverse kstar, ik means ik_ibz
for (auto& isym_kvd : kv.kstars[ik % ik_list.size()])
{
RI::Tensor<Tdata_m> mk_2D = RI_Util::Vector_to_Tensor<Tdata_m>(*mks_2D[ik_full + is_k * kv.get_nkstot_full()], pv.get_col_size(), pv.get_row_size());
const Tdata_m frac = SPIN_multiple
* RI::Global_Func::convert<Tdata_m>(std::exp(
-ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * ((isym_kvd.second * ucell.G) * (RI_Util::array3_to_Vector3(cell) * ucell.latvec))));
set_mR_2D(mk_2D * frac);
++ik_full;
}
}
} // end for ik
for(int iwt0_2D=0; iwt0_2D!=mR_2D.shape[0]; ++iwt0_2D)
{
const int iwt0 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)
? pv.local2global_col(iwt0_2D)
: pv.local2global_row(iwt0_2D);
int iat0, iw0_b, is0_b;
std::tie(iat0,iw0_b,is0_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt0);
const int it0 = ucell.iat2it[iat0];
for(int iwt1_2D=0; iwt1_2D!=mR_2D.shape[1]; ++iwt1_2D)
{
const int iwt1 =ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver)
? pv.local2global_row(iwt1_2D)
: pv.local2global_col(iwt1_2D);
int iat1, iw1_b, is1_b;
std::tie(iat1,iw1_b,is1_b) = RI_2D_Comm::get_iat_iw_is_block(ucell,iwt1);
const int it1 = ucell.iat2it[iat1];

const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b);
RI::Tensor<Tdata>& mR_a2D = mRs_a2D_thread[is_b][iat0][{iat1, cell}];
if (mR_a2D.empty())
{
mR_a2D = RI::Tensor<Tdata>(
{static_cast<size_t>(ucell.atoms[it0].nw),
static_cast<size_t>(ucell.atoms[it1].nw)});
}
mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D);
} // for iwt1_2D
} // end for iwt0_2D
} // end for icell
} // end for is_k

#ifdef _OPENMP
#pragma omp critical
#endif
{
for(int is=0; is<nspin; ++is)
for(auto &mRs_A : mRs_a2D_thread[is])
for(auto &mRs_B : mRs_A.second)
{
assert(mRs_a2D[is][mRs_A.first][mRs_B.first].empty());
mRs_a2D[is][mRs_A.first][mRs_B.first] = std::move(mRs_B.second);
}
mR_a2D(iw0_b,iw1_b) = mR_2D(iwt0_2D, iwt1_2D);
}
}
}
}
ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR");
} // end #pragma omp parallel
ModuleBase::timer::end("RI_2D_Comm", "split_m2D_ktoR_k");
return mRs_a2D;
}

Expand Down
Loading