From 65962212333957223d15a7a7346f290a27a49654 Mon Sep 17 00:00:00 2001 From: Growl Date: Sun, 31 May 2026 01:28:11 +0800 Subject: [PATCH 01/10] Add WFN extrapolation option and NAO history buffer --- .../module_parameter/input_parameter.h | 1 + .../read_input_item_system.cpp | 41 ++++++++ source/source_lcao/CMakeLists.txt | 1 + .../source_lcao/module_extrap/CMakeLists.txt | 13 +++ .../module_extrap/wf_extrap_method.h | 38 +++++++ .../module_extrap/wf_history_lcao.cpp | 99 +++++++++++++++++++ .../module_extrap/wf_history_lcao.h | 46 +++++++++ .../module_extrap/wf_snapshot_lcao.h | 81 +++++++++++++++ 8 files changed, 320 insertions(+) create mode 100644 source/source_lcao/module_extrap/CMakeLists.txt create mode 100644 source/source_lcao/module_extrap/wf_extrap_method.h create mode 100644 source/source_lcao/module_extrap/wf_history_lcao.cpp create mode 100644 source/source_lcao/module_extrap/wf_history_lcao.h create mode 100644 source/source_lcao/module_extrap/wf_snapshot_lcao.h diff --git a/source/source_io/module_parameter/input_parameter.h b/source/source_io/module_parameter/input_parameter.h index 6fac46679ec..af720569130 100644 --- a/source/source_io/module_parameter/input_parameter.h +++ b/source/source_io/module_parameter/input_parameter.h @@ -49,6 +49,7 @@ struct Input_para std::string init_chg = "atomic"; ///< "file","atomic" bool dm_to_rho = false; ///< read density matrix from npz format and calculate charge density std::string chg_extrap = "default"; ///< xiaohui modify 2015-02-01 + std::string wfc_extrap = "none"; ///< wavefunction extrapolation method for LCAO calculations bool init_vel = false; ///< read velocity from STRU or not liuyu 2021-07-14 std::string input_file = "INPUT"; ///< input file name diff --git a/source/source_io/module_parameter/read_input_item_system.cpp b/source/source_io/module_parameter/read_input_item_system.cpp index d746d2b5b12..74463295ba2 100644 --- a/source/source_io/module_parameter/read_input_item_system.cpp +++ b/source/source_io/module_parameter/read_input_item_system.cpp @@ -859,6 +859,47 @@ Available options are: }; this->add_item(item); } + { + Input_Item item("wfc_extrap"); + item.annotation = "none; use_prev_wf"; + item.category = "System variables"; + item.type = "String"; + item.description = R"(Wavefunction extrapolation method for LCAO calculations. + +* none: Disable wavefunction-based extrapolation. +* use_prev_wf: Use the previous ionic step wavefunctions as the initial guess. + +This option is currently limited to Gamma-only LCAO calculations. +The k-point, ASPC, and GExt_PROJ paths will be enabled by later updates.)"; + item.default_value = "none"; + read_sync_string(input.wfc_extrap); + item.check_value = [](const Input_Item& item, const Parameter& para) { + if (para.input.wfc_extrap != "none" && para.input.wfc_extrap != "use_prev_wf") + { + ModuleBase::WARNING_QUIT("ReadInput", "wfc_extrap should be none or use_prev_wf"); + } + if (para.input.wfc_extrap == "use_prev_wf") + { + if (para.input.basis_type != "lcao") + { + ModuleBase::WARNING_QUIT("ReadInput", "WFN-based extrapolation is available only for LCAO"); + } + if (!para.input.gamma_only) + { + ModuleBase::WARNING_QUIT("ReadInput", "WFN-based extrapolation is not implemented for k-points"); + } + if (para.input.esolver_type == "tddft") + { + ModuleBase::WARNING_QUIT("ReadInput", "WFN-based extrapolation is not implemented for tddft"); + } + if (para.input.nspin == 4) + { + ModuleBase::WARNING_QUIT("ReadInput", "WFN-based extrapolation is not implemented for nspin = 4"); + } + } + }; + this->add_item(item); + } { Input_Item item("ecutwfc"); item.annotation = "energy cutoff for wave functions"; diff --git a/source/source_lcao/CMakeLists.txt b/source/source_lcao/CMakeLists.txt index a793f5d5d02..480eed0b476 100644 --- a/source/source_lcao/CMakeLists.txt +++ b/source/source_lcao/CMakeLists.txt @@ -6,6 +6,7 @@ add_subdirectory(module_deltaspin) if(ENABLE_LCAO) add_subdirectory(module_operator_lcao) + add_subdirectory(module_extrap) list(APPEND objects hamilt_lcao.cpp module_operator_lcao/operator_lcao.cpp diff --git a/source/source_lcao/module_extrap/CMakeLists.txt b/source/source_lcao/module_extrap/CMakeLists.txt new file mode 100644 index 00000000000..f3574b62ad0 --- /dev/null +++ b/source/source_lcao/module_extrap/CMakeLists.txt @@ -0,0 +1,13 @@ +list(APPEND objects + wf_history_lcao.cpp +) + +add_library( + lcao_extrap + OBJECT + ${objects} +) + +if(ENABLE_COVERAGE) + add_coverage(lcao_extrap) +endif() diff --git a/source/source_lcao/module_extrap/wf_extrap_method.h b/source/source_lcao/module_extrap/wf_extrap_method.h new file mode 100644 index 00000000000..475aac4a0d2 --- /dev/null +++ b/source/source_lcao/module_extrap/wf_extrap_method.h @@ -0,0 +1,38 @@ +#ifndef SOURCE_LCAO_MODULE_EXTRAP_WF_EXTRAP_METHOD_H +#define SOURCE_LCAO_MODULE_EXTRAP_WF_EXTRAP_METHOD_H + +#include + +namespace ModuleExtrap +{ + +enum class WfcExtrapMethod +{ + None, + UsePrevWf +}; + +inline WfcExtrapMethod wfc_extrap_method_from_string(const std::string& method) +{ + if (method == "use_prev_wf") + { + return WfcExtrapMethod::UsePrevWf; + } + return WfcExtrapMethod::None; +} + +inline const char* to_string(const WfcExtrapMethod method) noexcept +{ + switch (method) + { + case WfcExtrapMethod::UsePrevWf: + return "use_prev_wf"; + case WfcExtrapMethod::None: + default: + return "none"; + } +} + +} // namespace ModuleExtrap + +#endif // SOURCE_LCAO_MODULE_EXTRAP_WF_EXTRAP_METHOD_H diff --git a/source/source_lcao/module_extrap/wf_history_lcao.cpp b/source/source_lcao/module_extrap/wf_history_lcao.cpp new file mode 100644 index 00000000000..8a258311f0a --- /dev/null +++ b/source/source_lcao/module_extrap/wf_history_lcao.cpp @@ -0,0 +1,99 @@ +#include "source_lcao/module_extrap/wf_history_lcao.h" + +#include +#include + +namespace ModuleExtrap +{ + +template +WfHistoryLCAO::WfHistoryLCAO(const WfcExtrapMethod method, const std::size_t max_depth) + : method_(method), max_depth_(std::max(max_depth, 1)) +{ +} + +template +WfcExtrapMethod WfHistoryLCAO::method() const noexcept +{ + return this->method_; +} + +template +bool WfHistoryLCAO::enabled() const noexcept +{ + return this->method_ != WfcExtrapMethod::None; +} + +template +std::size_t WfHistoryLCAO::size() const noexcept +{ + return this->snapshots_.size(); +} + +template +std::size_t WfHistoryLCAO::max_depth() const noexcept +{ + return this->max_depth_; +} + +template +bool WfHistoryLCAO::empty() const noexcept +{ + return this->snapshots_.empty(); +} + +template +void WfHistoryLCAO::set_method(const WfcExtrapMethod method) noexcept +{ + this->method_ = method; +} + +template +void WfHistoryLCAO::set_max_depth(const std::size_t max_depth) +{ + this->max_depth_ = std::max(max_depth, 1); + this->prune_history(); +} + +template +void WfHistoryLCAO::clear() noexcept +{ + this->snapshots_.clear(); +} + +template +void WfHistoryLCAO::update_after_scf(const int istep, const psi::Psi& psi, const ModuleBase::matrix& wg) +{ + if (!this->enabled()) + { + return; + } + + this->snapshots_.emplace_back(istep, psi, wg); + this->prune_history(); +} + +template +bool WfHistoryLCAO::latest_snapshot(WfSnapshotLCAO& snapshot) const +{ + if (this->snapshots_.empty()) + { + return false; + } + snapshot = this->snapshots_.back(); + return true; +} + +template +void WfHistoryLCAO::prune_history() +{ + while (this->snapshots_.size() > this->max_depth_) + { + this->snapshots_.pop_front(); + } +} + +template class WfHistoryLCAO; +template class WfHistoryLCAO>; + +} // namespace ModuleExtrap diff --git a/source/source_lcao/module_extrap/wf_history_lcao.h b/source/source_lcao/module_extrap/wf_history_lcao.h new file mode 100644 index 00000000000..1e7799619aa --- /dev/null +++ b/source/source_lcao/module_extrap/wf_history_lcao.h @@ -0,0 +1,46 @@ +#ifndef SOURCE_LCAO_MODULE_EXTRAP_WF_HISTORY_LCAO_H +#define SOURCE_LCAO_MODULE_EXTRAP_WF_HISTORY_LCAO_H + +#include "source_lcao/module_extrap/wf_extrap_method.h" +#include "source_lcao/module_extrap/wf_snapshot_lcao.h" + +#include +#include + +namespace ModuleExtrap +{ + +template +class WfHistoryLCAO +{ + public: + explicit WfHistoryLCAO(WfcExtrapMethod method = WfcExtrapMethod::None, std::size_t max_depth = 1); + + WfcExtrapMethod method() const noexcept; + bool enabled() const noexcept; + + std::size_t size() const noexcept; + std::size_t max_depth() const noexcept; + bool empty() const noexcept; + + void set_method(WfcExtrapMethod method) noexcept; + void set_max_depth(std::size_t max_depth); + void clear() noexcept; + + void update_after_scf(const int istep, const psi::Psi& psi, const ModuleBase::matrix& wg); + + // Return a copy of the most recent snapshot. This avoids exposing an internal + // pointer/reference whose lifetime would be invalidated by the next history update. + bool latest_snapshot(WfSnapshotLCAO& snapshot) const; + + private: + void prune_history(); + + WfcExtrapMethod method_ = WfcExtrapMethod::None; + std::size_t max_depth_ = 1; + std::deque> snapshots_; +}; + +} // namespace ModuleExtrap + +#endif // SOURCE_LCAO_MODULE_EXTRAP_WF_HISTORY_LCAO_H diff --git a/source/source_lcao/module_extrap/wf_snapshot_lcao.h b/source/source_lcao/module_extrap/wf_snapshot_lcao.h new file mode 100644 index 00000000000..b95d66090be --- /dev/null +++ b/source/source_lcao/module_extrap/wf_snapshot_lcao.h @@ -0,0 +1,81 @@ +#ifndef SOURCE_LCAO_MODULE_EXTRAP_WF_SNAPSHOT_LCAO_H +#define SOURCE_LCAO_MODULE_EXTRAP_WF_SNAPSHOT_LCAO_H + +#include "source_base/matrix.h" +#include "source_psi/psi.h" + +#include +#include + +namespace ModuleExtrap +{ + +template +struct WfSnapshotLCAO +{ + int istep = -1; + int nstate = 0; + int nbands = 0; + int nbasis = 0; + bool k_first = true; + + std::vector coeff; + ModuleBase::matrix wg; + + WfSnapshotLCAO() = default; + + WfSnapshotLCAO(const int istep_in, const psi::Psi& psi_in, const ModuleBase::matrix& wg_in) + { + this->save(istep_in, psi_in, wg_in); + } + + void save(const int istep_in, const psi::Psi& psi_in, const ModuleBase::matrix& wg_in) + { + this->istep = istep_in; + this->nstate = psi_in.get_nk(); + this->nbands = psi_in.get_nbands(); + this->nbasis = psi_in.get_nbasis(); + this->k_first = psi_in.get_k_first(); + this->wg = wg_in; + + // Psi::get_pointer() returns the pointer at the current fix_k()/fix_b() position. + // Subtract psi_bias to copy the full owned coefficient array from its real beginning. + const TK* coeff_begin = psi_in.get_pointer() - psi_in.get_psi_bias(); + this->coeff.assign(coeff_begin, coeff_begin + psi_in.size()); + } + + bool empty() const noexcept + { + return this->coeff.empty(); + } + + std::size_t size() const noexcept + { + return this->coeff.size(); + } + + bool compatible_with(const psi::Psi& psi_now, const ModuleBase::matrix& wg_now) const noexcept + { + return this->nstate == psi_now.get_nk() + && this->nbands == psi_now.get_nbands() + && this->nbasis == psi_now.get_nbasis() + && this->k_first == psi_now.get_k_first() + && this->coeff.size() == psi_now.size() + && this->wg.nr == wg_now.nr + && this->wg.nc == wg_now.nc; + } + + bool load_to(psi::Psi& psi_out) const + { + if (this->coeff.size() != psi_out.size()) + { + return false; + } + psi_out.set_all_psi(this->coeff.data(), this->coeff.size()); + return true; + } +}; + +} // namespace ModuleExtrap + +#endif // SOURCE_LCAO_MODULE_EXTRAP_WF_SNAPSHOT_LCAO_H From 2484c5a4766f8e0c545d0fc76f84155255d9b438 Mon Sep 17 00:00:00 2001 From: Growl Date: Sun, 31 May 2026 01:50:49 +0800 Subject: [PATCH 02/10] Add Gamma-only S-orthonormalization for NAO WFN extrapolation --- .../source_lcao/module_extrap/CMakeLists.txt | 1 + .../module_extrap/wf_extrap_method.h | 28 +++ .../module_extrap/wf_orthonormalize_lcao.cpp | 230 ++++++++++++++++++ .../module_extrap/wf_orthonormalize_lcao.h | 49 ++++ 4 files changed, 308 insertions(+) create mode 100644 source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp create mode 100644 source/source_lcao/module_extrap/wf_orthonormalize_lcao.h diff --git a/source/source_lcao/module_extrap/CMakeLists.txt b/source/source_lcao/module_extrap/CMakeLists.txt index f3574b62ad0..fcf83f56ff4 100644 --- a/source/source_lcao/module_extrap/CMakeLists.txt +++ b/source/source_lcao/module_extrap/CMakeLists.txt @@ -1,5 +1,6 @@ list(APPEND objects wf_history_lcao.cpp + wf_orthonormalize_lcao.cpp ) add_library( diff --git a/source/source_lcao/module_extrap/wf_extrap_method.h b/source/source_lcao/module_extrap/wf_extrap_method.h index 475aac4a0d2..a5d87227871 100644 --- a/source/source_lcao/module_extrap/wf_extrap_method.h +++ b/source/source_lcao/module_extrap/wf_extrap_method.h @@ -12,6 +12,15 @@ enum class WfcExtrapMethod UsePrevWf }; +enum class WfcExtrapStatus +{ + Success, + DimensionMismatch, + InvalidInput, + Unsupported, + OrthogonalizationFailed +}; + inline WfcExtrapMethod wfc_extrap_method_from_string(const std::string& method) { if (method == "use_prev_wf") @@ -33,6 +42,25 @@ inline const char* to_string(const WfcExtrapMethod method) noexcept } } +inline const char* to_string(const WfcExtrapStatus status) noexcept +{ + switch (status) + { + case WfcExtrapStatus::Success: + return "success"; + case WfcExtrapStatus::DimensionMismatch: + return "dimension_mismatch"; + case WfcExtrapStatus::InvalidInput: + return "invalid_input"; + case WfcExtrapStatus::Unsupported: + return "unsupported"; + case WfcExtrapStatus::OrthogonalizationFailed: + return "orthogonalization_failed"; + default: + return "unknown"; + } +} + } // namespace ModuleExtrap #endif // SOURCE_LCAO_MODULE_EXTRAP_WF_EXTRAP_METHOD_H diff --git a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp new file mode 100644 index 00000000000..4b590387342 --- /dev/null +++ b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp @@ -0,0 +1,230 @@ +#include "source_lcao/module_extrap/wf_orthonormalize_lcao.h" + +#include "source_base/module_external/blas_connector.h" + +#include +#include +#include + +namespace ModuleExtrap +{ +namespace +{ + +inline std::size_t idx(const int row, const int col, const int ncol) noexcept +{ + return static_cast(row) * static_cast(ncol) + static_cast(col); +} + +void build_overlap_in_band_space(const double* overlap, + const double* coeff, + const int nbands, + const int nbasis, + std::vector& metric) +{ + metric.assign(static_cast(nbands) * static_cast(nbands), 0.0); + std::vector coeff_s(static_cast(nbands) * static_cast(nbasis), 0.0); + + const double one = 1.0; + const double zero = 0.0; + + // Coefficients are stored as a row-major band-by-basis matrix X. With the + // current row-major overlap S, the band-space metric is O = X S X^T. + BlasConnector::gemm('N', + 'N', + nbands, + nbasis, + nbasis, + one, + coeff, + nbasis, + overlap, + nbasis, + zero, + coeff_s.data(), + nbasis); + + BlasConnector::gemm('N', + 'T', + nbands, + nbands, + nbasis, + one, + coeff_s.data(), + nbasis, + coeff, + nbasis, + zero, + metric.data(), + nbands); + + // Remove tiny asymmetry introduced by BLAS roundoff before the Cholesky step. + for (int i = 0; i < nbands; ++i) + { + for (int j = 0; j < i; ++j) + { + const double value = 0.5 * (metric[idx(i, j, nbands)] + metric[idx(j, i, nbands)]); + metric[idx(i, j, nbands)] = value; + metric[idx(j, i, nbands)] = value; + } + } +} + +bool cholesky_lower_in_place(std::vector& metric, const int n, const double pivot_threshold) +{ + for (int j = 0; j < n; ++j) + { + double diag = metric[idx(j, j, n)]; + for (int k = 0; k < j; ++k) + { + const double ljk = metric[idx(j, k, n)]; + diag -= ljk * ljk; + } + + if (!(diag > pivot_threshold) || !std::isfinite(diag)) + { + return false; + } + + const double ljj = std::sqrt(diag); + metric[idx(j, j, n)] = ljj; + + for (int i = j + 1; i < n; ++i) + { + double value = metric[idx(i, j, n)]; + for (int k = 0; k < j; ++k) + { + value -= metric[idx(i, k, n)] * metric[idx(j, k, n)]; + } + metric[idx(i, j, n)] = value / ljj; + } + + for (int k = j + 1; k < n; ++k) + { + metric[idx(j, k, n)] = 0.0; + } + } + return true; +} + +void apply_inverse_cholesky_left(const std::vector& lower, + double* coeff, + const int nbands, + const int nbasis) +{ + std::vector column(static_cast(nbands), 0.0); + + // Solve L * C_new(:,mu) = C_old(:,mu) for every basis function mu. + for (int mu = 0; mu < nbasis; ++mu) + { + for (int ib = 0; ib < nbands; ++ib) + { + double value = coeff[idx(ib, mu, nbasis)]; + for (int jb = 0; jb < ib; ++jb) + { + value -= lower[idx(ib, jb, nbands)] * column[jb]; + } + column[ib] = value / lower[idx(ib, ib, nbands)]; + } + + for (int ib = 0; ib < nbands; ++ib) + { + coeff[idx(ib, mu, nbasis)] = column[ib]; + } + } +} + +double max_orthonormality_deviation(const double* overlap, + const double* coeff, + const int nbands, + const int nbasis) +{ + std::vector metric; + build_overlap_in_band_space(overlap, coeff, nbands, nbasis, metric); + + double max_dev = 0.0; + for (int i = 0; i < nbands; ++i) + { + for (int j = 0; j < nbands; ++j) + { + const double expected = (i == j) ? 1.0 : 0.0; + max_dev = std::max(max_dev, std::abs(metric[idx(i, j, nbands)] - expected)); + } + } + return max_dev; +} + +} // namespace + +WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, + psi::Psi& wfc, + const double pivot_threshold, + const double check_tolerance) +{ + WfOrthonormalizeResult result; + + if (overlap == nullptr || !(pivot_threshold >= 0.0) || !(check_tolerance > 0.0)) + { + result.status = WfcExtrapStatus::InvalidInput; + return result; + } + + const int nstate = wfc.get_nk(); + const int nbands = wfc.get_nbands(); + const int nbasis = wfc.get_nbasis(); + + if (nstate <= 0 || nbands <= 0 || nbasis <= 0 || nbands > nbasis) + { + result.status = WfcExtrapStatus::DimensionMismatch; + return result; + } + + if (!wfc.get_k_first()) + { + result.status = WfcExtrapStatus::Unsupported; + return result; + } + + // Operate on a full owned copy first. The caller's Psi is overwritten only after + // all state/spin channels pass the positive-definiteness and residual checks. + std::vector trial(wfc.size(), 0.0); + const double* coeff_begin = wfc.get_pointer() - wfc.get_psi_bias(); + std::copy(coeff_begin, coeff_begin + wfc.size(), trial.begin()); + + double max_deviation_all = 0.0; + for (int istate = 0; istate < nstate; ++istate) + { + double* coeff = trial.data() + + static_cast(istate) * static_cast(nbands) + * static_cast(nbasis); + + std::vector metric; + build_overlap_in_band_space(overlap, coeff, nbands, nbasis, metric); + + if (!cholesky_lower_in_place(metric, nbands, pivot_threshold)) + { + result.status = WfcExtrapStatus::OrthogonalizationFailed; + result.failed_state = istate; + return result; + } + + apply_inverse_cholesky_left(metric, coeff, nbands, nbasis); + + const double max_dev = max_orthonormality_deviation(overlap, coeff, nbands, nbasis); + if (!std::isfinite(max_dev) || max_dev > check_tolerance) + { + result.status = WfcExtrapStatus::OrthogonalizationFailed; + result.failed_state = istate; + result.max_deviation = max_dev; + return result; + } + max_deviation_all = std::max(max_deviation_all, max_dev); + } + + wfc.set_all_psi(trial.data(), trial.size()); + result.status = WfcExtrapStatus::Success; + result.max_deviation = max_deviation_all; + return result; +} + +} // namespace ModuleExtrap diff --git a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.h b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.h new file mode 100644 index 00000000000..25842a8d5f7 --- /dev/null +++ b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.h @@ -0,0 +1,49 @@ +#ifndef SOURCE_LCAO_MODULE_EXTRAP_WF_ORTHONORMALIZE_LCAO_H +#define SOURCE_LCAO_MODULE_EXTRAP_WF_ORTHONORMALIZE_LCAO_H + +#include "source_lcao/module_extrap/wf_extrap_method.h" +#include "source_psi/psi.h" + +namespace ModuleExtrap +{ + +struct WfOrthonormalizeResult +{ + WfcExtrapStatus status = WfcExtrapStatus::Success; + double max_deviation = 0.0; + int failed_state = -1; + + bool ok() const noexcept + { + return this->status == WfcExtrapStatus::Success; + } +}; + +/** + * Reorthonormalize Gamma-only NAO wavefunctions with the current overlap matrix. + * + * The input wavefunction coefficients are assumed to be stored as band-major rows, + * i.e. C(iband, ibasis) for the current Gamma/spin channel. For each channel this + * routine computes + * + * O = C S C^T + * + * and applies a Cholesky-based transformation C_new = L^{-1} C, where O = L L^T. + * Thus the updated coefficients satisfy C_new S C_new^T = I up to numerical noise. + * + * In Gamma-only LCAO, Psi::get_nk() may label spin channels rather than physical + * k-points. This routine therefore treats the first Psi dimension as a generic + * state/channel index. + * + * This is a dense serial utility for the first use_prev_wf implementation. The + * distributed/k-point path should be implemented separately instead of silently + * reusing this routine for block-cyclic matrices. + */ +WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, + psi::Psi& wfc, + double pivot_threshold = 1.0e-14, + double check_tolerance = 1.0e-8); + +} // namespace ModuleExtrap + +#endif // SOURCE_LCAO_MODULE_EXTRAP_WF_ORTHONORMALIZE_LCAO_H From df873e281dbb38182a47ec764d6bf4a2ed195f17 Mon Sep 17 00:00:00 2001 From: Growl Date: Sun, 31 May 2026 02:02:46 +0800 Subject: [PATCH 03/10] Implement WfHistoryLCAO main apply logic --- .../module_extrap/wf_extrap_method.h | 6 ++ .../module_extrap/wf_history_lcao.cpp | 83 +++++++++++++++++++ .../module_extrap/wf_history_lcao.h | 32 +++++++ 3 files changed, 121 insertions(+) diff --git a/source/source_lcao/module_extrap/wf_extrap_method.h b/source/source_lcao/module_extrap/wf_extrap_method.h index a5d87227871..235c3151a59 100644 --- a/source/source_lcao/module_extrap/wf_extrap_method.h +++ b/source/source_lcao/module_extrap/wf_extrap_method.h @@ -15,6 +15,8 @@ enum class WfcExtrapMethod enum class WfcExtrapStatus { Success, + Disabled, + EmptyHistory, DimensionMismatch, InvalidInput, Unsupported, @@ -48,6 +50,10 @@ inline const char* to_string(const WfcExtrapStatus status) noexcept { case WfcExtrapStatus::Success: return "success"; + case WfcExtrapStatus::Disabled: + return "disabled"; + case WfcExtrapStatus::EmptyHistory: + return "empty_history"; case WfcExtrapStatus::DimensionMismatch: return "dimension_mismatch"; case WfcExtrapStatus::InvalidInput: diff --git a/source/source_lcao/module_extrap/wf_history_lcao.cpp b/source/source_lcao/module_extrap/wf_history_lcao.cpp index 8a258311f0a..1fb457a1748 100644 --- a/source/source_lcao/module_extrap/wf_history_lcao.cpp +++ b/source/source_lcao/module_extrap/wf_history_lcao.cpp @@ -1,7 +1,10 @@ #include "source_lcao/module_extrap/wf_history_lcao.h" +#include "source_lcao/module_extrap/wf_orthonormalize_lcao.h" + #include #include +#include namespace ModuleExtrap { @@ -84,6 +87,86 @@ bool WfHistoryLCAO::latest_snapshot(WfSnapshotLCAO& snapshot) const return true; } +template +WfExtrapApplyResult WfHistoryLCAO::try_use_prev_wf_gamma(const double* current_overlap, + psi::Psi& psi, + const ModuleBase::matrix& wg_now, + const double pivot_threshold, + const double check_tolerance) +{ + WfExtrapApplyResult result; + + if constexpr (!std::is_same::value) + { + result.status = WfcExtrapStatus::Unsupported; + return result; + } + else + { + if (this->method_ != WfcExtrapMethod::UsePrevWf) + { + result.status = WfcExtrapStatus::Disabled; + return result; + } + + if (this->snapshots_.empty()) + { + result.status = WfcExtrapStatus::EmptyHistory; + return result; + } + + if (current_overlap == nullptr || !(pivot_threshold >= 0.0) || !(check_tolerance > 0.0)) + { + result.status = WfcExtrapStatus::InvalidInput; + return result; + } + + // The first PR only supports the real Gamma-only path. In this path the + // first Psi dimension may label spin channels, but not physical k-points. + if (!psi.get_k_first()) + { + result.status = WfcExtrapStatus::Unsupported; + return result; + } + + const WfSnapshotLCAO& snapshot = this->snapshots_.back(); + result.snapshot_istep = snapshot.istep; + + if (!snapshot.compatible_with(psi, wg_now)) + { + result.status = WfcExtrapStatus::DimensionMismatch; + return result; + } + + // Work on an owned trial Psi first. The caller's Psi is not overwritten + // unless the stored WFN can be loaded and reorthonormalized successfully. + psi::Psi psi_trial(psi); + if (!snapshot.load_to(psi_trial)) + { + result.status = WfcExtrapStatus::DimensionMismatch; + return result; + } + + const WfOrthonormalizeResult orth_result = reorthonormalize_gamma_lcao(current_overlap, + psi_trial, + pivot_threshold, + check_tolerance); + if (!orth_result.ok()) + { + result.status = orth_result.status; + result.failed_state = orth_result.failed_state; + result.max_orthonormality_deviation = orth_result.max_deviation; + return result; + } + + psi = psi_trial; + result.status = WfcExtrapStatus::Success; + result.failed_state = -1; + result.max_orthonormality_deviation = orth_result.max_deviation; + return result; + } +} + template void WfHistoryLCAO::prune_history() { diff --git a/source/source_lcao/module_extrap/wf_history_lcao.h b/source/source_lcao/module_extrap/wf_history_lcao.h index 1e7799619aa..fbf39337dcf 100644 --- a/source/source_lcao/module_extrap/wf_history_lcao.h +++ b/source/source_lcao/module_extrap/wf_history_lcao.h @@ -10,6 +10,19 @@ namespace ModuleExtrap { +struct WfExtrapApplyResult +{ + WfcExtrapStatus status = WfcExtrapStatus::Disabled; + double max_orthonormality_deviation = 0.0; + int failed_state = -1; + int snapshot_istep = -1; + + bool ok() const noexcept + { + return this->status == WfcExtrapStatus::Success; + } +}; + template class WfHistoryLCAO { @@ -33,6 +46,25 @@ class WfHistoryLCAO // pointer/reference whose lifetime would be invalidated by the next history update. bool latest_snapshot(WfSnapshotLCAO& snapshot) const; + /** + * Restore the latest Gamma-only NAO wavefunction and reorthonormalize it with + * the current overlap matrix. + * + * This method only updates the Psi object after the full restore and + * reorthonormalization path succeeds. DMK/DMR/rho rebuilding is deliberately + * left to the ESolver integration layer, where the existing density-matrix code + * can be called with the same control flow as the current initialization path. + * + * The first PR only supports the real Gamma-only path. Multi-k support requires + * complex WFN snapshots, per-k S(k), and phase/convention handling, and should be + * added in a separate path instead of silently reusing this helper. + */ + WfExtrapApplyResult try_use_prev_wf_gamma(const double* current_overlap, + psi::Psi& psi, + const ModuleBase::matrix& wg_now, + double pivot_threshold = 1.0e-14, + double check_tolerance = 1.0e-8); + private: void prune_history(); From 4469729c9b2e075f6158f382a4d979697c83f032 Mon Sep 17 00:00:00 2001 From: Growl Date: Sun, 31 May 2026 02:17:40 +0800 Subject: [PATCH 04/10] Wire NAO WFN history into SCF initialization --- source/source_esolver/esolver_ks_lcao.cpp | 67 +++++++++++++++++++++-- source/source_esolver/esolver_ks_lcao.h | 11 ++++ source/source_lcao/CMakeLists.txt | 2 + 3 files changed, 75 insertions(+), 5 deletions(-) diff --git a/source/source_esolver/esolver_ks_lcao.cpp b/source/source_esolver/esolver_ks_lcao.cpp index fc0ecddc8a2..a328c22f52d 100644 --- a/source/source_esolver/esolver_ks_lcao.cpp +++ b/source/source_esolver/esolver_ks_lcao.cpp @@ -16,8 +16,10 @@ #include "../source_lcao/module_ri/exx_opt_orb.h" #endif #include "source_lcao/module_rdmft/rdmft.h" +#include "source_lcao/module_extrap/wf_history_lcao.h" #include "source_estate/module_charge/chgmixing.h" // use charge mixing, mohan add 20251006 #include "source_estate/module_dm/init_dm.h" // init dm from electronic wave functions +#include "source_estate/module_dm/cal_dm_psi.h" // rebuild DMK from extrapolated WFN #include "source_io/module_ctrl/ctrl_runner_lcao.h" // use ctrl_runner_lcao() #include "source_io/module_ctrl/ctrl_iter_lcao.h" // use ctrl_iter_lcao() #include "source_io/module_ctrl/ctrl_scf_lcao.h" // use ctrl_scf_lcao() @@ -26,6 +28,8 @@ #include "source_lcao/LCAO_set.h" // mohan add 20251111 #include "source_psi/setup_psi.h" // use Setup_Psi for deallocate_psi +#include + namespace ModuleESolver { @@ -82,6 +86,14 @@ void ESolver_KS_LCAO::before_all_runners(UnitCell& ucell, const Input_pa LCAO_domain::set_psi_occ_dm_chg(this->kv, this->psi, this->pv, this->pelec, this->dmat, this->chr, inp); + const auto wfc_extrap_method = ModuleExtrap::wfc_extrap_method_from_string(inp.wfc_extrap); + if (wfc_extrap_method != ModuleExtrap::WfcExtrapMethod::None) + { + this->wf_history_lcao_ = std::make_unique>(wfc_extrap_method); + GlobalV::ofs_running << " WFN extrapolation method: " + << ModuleExtrap::to_string(wfc_extrap_method) << std::endl; + } + LCAO_domain::set_pot(ucell, this->kv, this->sf, *this->pw_rho, *this->pw_rhod, this->pelec, this->orb_, this->pv, this->locpp, this->dftu, this->solvent, this->exx_nao, this->deepks, inp); @@ -175,6 +187,8 @@ void ESolver_KS_LCAO::before_scf(UnitCell& ucell, const int istep) } this->dmat.dm->init_DMR(*hamilt_lcao->getHR()); + bool initialized_by_wfc_extrap = false; + // 13.1) decide the strategy for initializing DMR and HR if(istep == 0)//if the first scf step, readin DMR from file, { @@ -193,12 +207,50 @@ void ESolver_KS_LCAO::before_scf(UnitCell& ucell, const int istep) this->chr, PARAM.inp.ks_solver); } } - else if(PARAM.inp.esolver_type!="tddft")//if not, use the DMR calculated from last step + else if(PARAM.inp.esolver_type!="tddft")//if not, initialize DMR from WFN history or last-step DMK { - // 13.1.2) two cases are considered: - // 1. DMK in DensityMatrix is not empty (istep > 0), then DMR is initialized by DMK - // 2. DMK in DensityMatrix is empty (istep == 0), then DMR is initialized by zeros - this->dmat.dm->cal_DMR(); + if constexpr (std::is_same::value) + { + if (this->wf_history_lcao_ != nullptr) + { + // Refresh the current Gamma-only overlap matrix before reusing the + // previous WFN. The restored WFN is accepted only after it satisfies + // C^T S_now C = I in ModuleExtrap::try_use_prev_wf_gamma(). + hamilt_lcao->updateSk(0, 0); + const ModuleExtrap::WfExtrapApplyResult wfc_result + = this->wf_history_lcao_->try_use_prev_wf_gamma(hamilt_lcao->getSk(), + *(this->psi), + this->pelec->wg); + if (wfc_result.ok()) + { + elecstate::cal_dm_psi(this->dmat.dm->get_paraV_pointer(), + this->pelec->wg, + *(this->psi), + *(this->dmat.dm)); + this->dmat.dm->cal_DMR(); + LCAO_domain::dm2rho(this->dmat.dm->get_DMR_vector(), PARAM.inp.nspin, &this->chr); + initialized_by_wfc_extrap = true; + + GlobalV::ofs_running << " WFN extrapolation: use_prev_wf from ionic step " + << wfc_result.snapshot_istep + << ", max |C^T S C - I| = " + << wfc_result.max_orthonormality_deviation << std::endl; + } + else if (wfc_result.status != ModuleExtrap::WfcExtrapStatus::EmptyHistory) + { + GlobalV::ofs_running << " WFN extrapolation: fallback to density-matrix initialization (" + << ModuleExtrap::to_string(wfc_result.status) << ")" << std::endl; + } + } + } + + if (!initialized_by_wfc_extrap) + { + // 13.1.2) two cases are considered: + // 1. DMK in DensityMatrix is not empty (istep > 0), then DMR is initialized by DMK + // 2. DMK in DensityMatrix is empty (istep == 0), then DMR is initialized by zeros + this->dmat.dm->cal_DMR(); + } } // 13.2) init_scf, should be before_scf? mohan add 2025-03-10 elecstate::init_scf(ucell, this->Pgrid, this->sf.strucFac, this->locpp.numeric, @@ -521,6 +573,11 @@ void ESolver_KS_LCAO::after_scf(UnitCell& ucell, const int istep, const this->rdmft_solver, this->deepks, this->exx_nao, this->conv_esolver, this->scf_nmax_flag, istep); + if (conv_esolver && this->wf_history_lcao_ != nullptr && this->psi != nullptr) + { + this->wf_history_lcao_->update_after_scf(istep, *(this->psi), this->pelec->wg); + } + //! 3) Clean up RA, which is used to serach for adjacent atoms if (!PARAM.inp.cal_force && !PARAM.inp.cal_stress) { diff --git a/source/source_esolver/esolver_ks_lcao.h b/source/source_esolver/esolver_ks_lcao.h index bfd80e50fe3..dd1aa78b1aa 100644 --- a/source/source_esolver/esolver_ks_lcao.h +++ b/source/source_esolver/esolver_ks_lcao.h @@ -22,6 +22,12 @@ template class ESolver_LR; } +namespace ModuleExtrap +{ +template +class WfHistoryLCAO; +} + //----------------------------------- // ESolver for LCAO //----------------------------------- @@ -82,6 +88,11 @@ class ESolver_KS_LCAO : public ESolver_KS //! Add density matrix class, mohan add 2025-10-30 LCAO_domain::Setup_DM dmat; + //! Optional NAO wavefunction-history extrapolation state. + //! The object owns only copied wavefunction snapshots; transient objects such as + //! Psi, HamiltLCAO, DensityMatrix, and Charge are passed in only when needed. + std::unique_ptr> wf_history_lcao_; + // For deepks method, mohan add 2025-10-08 Setup_DeePKS deepks; diff --git a/source/source_lcao/CMakeLists.txt b/source/source_lcao/CMakeLists.txt index 480eed0b476..83777751591 100644 --- a/source/source_lcao/CMakeLists.txt +++ b/source/source_lcao/CMakeLists.txt @@ -24,6 +24,8 @@ if(ENABLE_LCAO) module_operator_lcao/dspin_lcao.cpp module_operator_lcao/dftu_lcao.cpp module_operator_lcao/operator_force_stress_utils.cpp + module_extrap/wf_history_lcao.cpp + module_extrap/wf_orthonormalize_lcao.cpp dftu_lcao.cpp pulay_fs_center2.cpp FORCE_STRESS.cpp From b4c6239332386b5ea85a8415256f0ac7a9c4912d Mon Sep 17 00:00:00 2001 From: Growl Date: Sun, 31 May 2026 03:34:32 +0800 Subject: [PATCH 05/10] Debugging & bug fixes --- source/source_esolver/esolver_ks_lcao.cpp | 51 ++++- .../module_extrap/wf_history_lcao.cpp | 16 ++ .../module_extrap/wf_history_lcao.h | 12 ++ .../module_extrap/wf_orthonormalize_lcao.cpp | 196 ++++++++++++------ .../module_extrap/wf_orthonormalize_lcao.h | 22 ++ 5 files changed, 227 insertions(+), 70 deletions(-) diff --git a/source/source_esolver/esolver_ks_lcao.cpp b/source/source_esolver/esolver_ks_lcao.cpp index a328c22f52d..0e79e0220cd 100644 --- a/source/source_esolver/esolver_ks_lcao.cpp +++ b/source/source_esolver/esolver_ks_lcao.cpp @@ -17,6 +17,7 @@ #endif #include "source_lcao/module_rdmft/rdmft.h" #include "source_lcao/module_extrap/wf_history_lcao.h" +#include "source_lcao/module_operator_lcao/operator_lcao.h" // build overlap SR before WFN reorthonormalization #include "source_estate/module_charge/chgmixing.h" // use charge mixing, mohan add 20251006 #include "source_estate/module_dm/init_dm.h" // init dm from electronic wave functions #include "source_estate/module_dm/cal_dm_psi.h" // rebuild DMK from extrapolated WFN @@ -28,6 +29,9 @@ #include "source_lcao/LCAO_set.h" // mohan add 20251111 #include "source_psi/setup_psi.h" // use Setup_Psi for deallocate_psi +#include +#include +#include #include namespace ModuleESolver @@ -207,15 +211,23 @@ void ESolver_KS_LCAO::before_scf(UnitCell& ucell, const int istep) this->chr, PARAM.inp.ks_solver); } } - else if(PARAM.inp.esolver_type!="tddft")//if not, initialize DMR from WFN history or last-step DMK + else if(PARAM.inp.esolver_type!="tddft")//initialize DMR from WFN history if required { if constexpr (std::is_same::value) { if (this->wf_history_lcao_ != nullptr) { - // Refresh the current Gamma-only overlap matrix before reusing the - // previous WFN. The restored WFN is accepted only after it satisfies - // C^T S_now C = I in ModuleExtrap::try_use_prev_wf_gamma(). + // The newly-created HamiltLCAO has allocated S(R), but the actual + // overlap values are normally filled later by updateHk() inside the + // eigensolver. WFN extrapolation needs S before entering SCF, so build + // only the overlap real-space matrix here and then fold it to S(Gamma). + auto* lcao_op = dynamic_cast*>(hamilt_lcao->getOperator()); + if (lcao_op == nullptr) + { + ModuleBase::WARNING_QUIT("ESolver_KS_LCAO::before_scf", + "Failed to access the LCAO operator chain for WFN extrapolation."); + } + lcao_op->contributeHR(); hamilt_lcao->updateSk(0, 0); const ModuleExtrap::WfExtrapApplyResult wfc_result = this->wf_history_lcao_->try_use_prev_wf_gamma(hamilt_lcao->getSk(), @@ -236,13 +248,38 @@ void ESolver_KS_LCAO::before_scf(UnitCell& ucell, const int istep) << ", max |C^T S C - I| = " << wfc_result.max_orthonormality_deviation << std::endl; } - else if (wfc_result.status != ModuleExtrap::WfcExtrapStatus::EmptyHistory) + else { - GlobalV::ofs_running << " WFN extrapolation: fallback to density-matrix initialization (" - << ModuleExtrap::to_string(wfc_result.status) << ")" << std::endl; + std::ostringstream oss; + oss << std::scientific << std::setprecision(16); + oss << "WFN extrapolation failed with status: " + << ModuleExtrap::to_string(wfc_result.status) << "\n\n" + << " snapshot_istep=" << wfc_result.snapshot_istep << "\n" + << " failed_state=" << wfc_result.failed_state << "\n" + << " failed_pivot_index=" << wfc_result.failed_pivot_index << "\n" + << " failed_pivot=" << wfc_result.failed_pivot << "\n" + << " nstate=" << wfc_result.nstate << "\n" + << " nbands=" << wfc_result.nbands << "\n" + << " nbasis=" << wfc_result.nbasis << "\n" + << " min_metric_diag=" << wfc_result.min_metric_diag << "\n" + << " max_metric_diag=" << wfc_result.max_metric_diag << "\n" + << " max_metric_abs=" << wfc_result.max_metric_abs << "\n" + << " max_metric_asymmetry=" << wfc_result.max_metric_asymmetry << "\n" + << " max_orthonormality_deviation=" + << wfc_result.max_orthonormality_deviation << "\n"; + const std::string message = oss.str(); + ModuleBase::WARNING_QUIT("ESolver_KS_LCAO::before_scf", message); } } } + else + { + if (this->wf_history_lcao_ != nullptr) + { + ModuleBase::WARNING_QUIT("ESolver_KS_LCAO::before_scf", + "WFN extrapolation is currently supported only for the real Gamma-only NAO path."); + } + } if (!initialized_by_wfc_extrap) { diff --git a/source/source_lcao/module_extrap/wf_history_lcao.cpp b/source/source_lcao/module_extrap/wf_history_lcao.cpp index 1fb457a1748..cca040619e0 100644 --- a/source/source_lcao/module_extrap/wf_history_lcao.cpp +++ b/source/source_lcao/module_extrap/wf_history_lcao.cpp @@ -149,12 +149,24 @@ WfExtrapApplyResult WfHistoryLCAO::try_use_prev_wf_gamma(const double* curre const WfOrthonormalizeResult orth_result = reorthonormalize_gamma_lcao(current_overlap, psi_trial, + snapshot.wg, + 1.0e-12, pivot_threshold, check_tolerance); if (!orth_result.ok()) { result.status = orth_result.status; result.failed_state = orth_result.failed_state; + result.failed_pivot_index = orth_result.failed_pivot_index; + result.failed_pivot = orth_result.failed_pivot; + result.min_metric_diag = orth_result.min_metric_diag; + result.max_metric_diag = orth_result.max_metric_diag; + result.max_metric_abs = orth_result.max_metric_abs; + result.max_metric_asymmetry = orth_result.max_metric_asymmetry; + result.nstate = orth_result.nstate; + result.nbands = orth_result.nbands; + result.nbasis = orth_result.nbasis; + result.nactive_bands = orth_result.nactive_bands; result.max_orthonormality_deviation = orth_result.max_deviation; return result; } @@ -162,6 +174,10 @@ WfExtrapApplyResult WfHistoryLCAO::try_use_prev_wf_gamma(const double* curre psi = psi_trial; result.status = WfcExtrapStatus::Success; result.failed_state = -1; + result.nstate = orth_result.nstate; + result.nbands = orth_result.nbands; + result.nbasis = orth_result.nbasis; + result.nactive_bands = orth_result.nactive_bands; result.max_orthonormality_deviation = orth_result.max_deviation; return result; } diff --git a/source/source_lcao/module_extrap/wf_history_lcao.h b/source/source_lcao/module_extrap/wf_history_lcao.h index fbf39337dcf..5a739e7d292 100644 --- a/source/source_lcao/module_extrap/wf_history_lcao.h +++ b/source/source_lcao/module_extrap/wf_history_lcao.h @@ -14,7 +14,19 @@ struct WfExtrapApplyResult { WfcExtrapStatus status = WfcExtrapStatus::Disabled; double max_orthonormality_deviation = 0.0; + + // Propagated diagnostics from WfOrthonormalizeResult. + double failed_pivot = 0.0; + double min_metric_diag = 0.0; + double max_metric_diag = 0.0; + double max_metric_abs = 0.0; + double max_metric_asymmetry = 0.0; int failed_state = -1; + int failed_pivot_index = -1; + int nstate = 0; + int nbands = 0; + int nbasis = 0; + int nactive_bands = 0; int snapshot_istep = -1; bool ok() const noexcept diff --git a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp index 4b590387342..859b0034284 100644 --- a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp +++ b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp @@ -1,7 +1,5 @@ #include "source_lcao/module_extrap/wf_orthonormalize_lcao.h" -#include "source_base/module_external/blas_connector.h" - #include #include #include @@ -16,61 +14,104 @@ inline std::size_t idx(const int row, const int col, const int ncol) noexcept return static_cast(row) * static_cast(ncol) + static_cast(col); } +int active_bands_for_state(const ModuleBase::matrix& occupations, + const int istate, + const int nbands, + const double occupation_threshold) +{ + if (occupations.c == nullptr || occupations.nr <= istate || occupations.nc <= 0) + { + return nbands; + } + + // In ABACUS, Psi may contain more local eigenvectors than the occupation + // matrix explicitly stores. This is already supported by cal_dm_psi(), + // which simply ignores local bands without a corresponding global weight. + const int nband_with_occupation = std::min(nbands, occupations.nc); + int nactive = 0; + for (int ib = 0; ib < nband_with_occupation; ++ib) + { + if (std::abs(occupations(istate, ib)) > occupation_threshold) + { + nactive = ib + 1; + } + } + return nactive; +} + void build_overlap_in_band_space(const double* overlap, const double* coeff, - const int nbands, + const int nactive_bands, const int nbasis, std::vector& metric) { - metric.assign(static_cast(nbands) * static_cast(nbands), 0.0); - std::vector coeff_s(static_cast(nbands) * static_cast(nbasis), 0.0); - - const double one = 1.0; - const double zero = 0.0; - - // Coefficients are stored as a row-major band-by-basis matrix X. With the - // current row-major overlap S, the band-space metric is O = X S X^T. - BlasConnector::gemm('N', - 'N', - nbands, - nbasis, - nbasis, - one, - coeff, - nbasis, - overlap, - nbasis, - zero, - coeff_s.data(), - nbasis); - - BlasConnector::gemm('N', - 'T', - nbands, - nbands, - nbasis, - one, - coeff_s.data(), - nbasis, - coeff, - nbasis, - zero, - metric.data(), - nbands); - - // Remove tiny asymmetry introduced by BLAS roundoff before the Cholesky step. - for (int i = 0; i < nbands; ++i) + metric.assign(static_cast(nactive_bands) * static_cast(nactive_bands), 0.0); + std::vector coeff_s(static_cast(nactive_bands) * static_cast(nbasis), 0.0); + + // Coefficients are stored as a row-major band-by-basis matrix C. Build the + // band-space metric explicitly as O = C S C^T to avoid ambiguity about the + // row-major layout when using Fortran BLAS wrappers. + for (int ib = 0; ib < nactive_bands; ++ib) + { + for (int mu = 0; mu < nbasis; ++mu) + { + double value = 0.0; + for (int nu = 0; nu < nbasis; ++nu) + { + value += coeff[idx(ib, nu, nbasis)] * overlap[idx(nu, mu, nbasis)]; + } + coeff_s[idx(ib, mu, nbasis)] = value; + } + } + + for (int ib = 0; ib < nactive_bands; ++ib) + { + for (int jb = 0; jb <= ib; ++jb) + { + double value = 0.0; + for (int mu = 0; mu < nbasis; ++mu) + { + value += coeff_s[idx(ib, mu, nbasis)] * coeff[idx(jb, mu, nbasis)]; + } + metric[idx(ib, jb, nactive_bands)] = value; + metric[idx(jb, ib, nactive_bands)] = value; + } + } +} + +void fill_metric_diagnostics(const std::vector& metric, + const int n, + WfOrthonormalizeResult& result) +{ + if (n <= 0 || metric.empty()) + { + return; + } + + result.min_metric_diag = metric[idx(0, 0, n)]; + result.max_metric_diag = metric[idx(0, 0, n)]; + result.max_metric_abs = 0.0; + result.max_metric_asymmetry = 0.0; + + for (int i = 0; i < n; ++i) { - for (int j = 0; j < i; ++j) + const double diag = metric[idx(i, i, n)]; + result.min_metric_diag = std::min(result.min_metric_diag, diag); + result.max_metric_diag = std::max(result.max_metric_diag, diag); + for (int j = 0; j < n; ++j) { - const double value = 0.5 * (metric[idx(i, j, nbands)] + metric[idx(j, i, nbands)]); - metric[idx(i, j, nbands)] = value; - metric[idx(j, i, nbands)] = value; + result.max_metric_abs = std::max(result.max_metric_abs, std::abs(metric[idx(i, j, n)])); + result.max_metric_asymmetry = std::max(result.max_metric_asymmetry, + std::abs(metric[idx(i, j, n)] - metric[idx(j, i, n)])); } } } -bool cholesky_lower_in_place(std::vector& metric, const int n, const double pivot_threshold) +bool cholesky_lower_in_place(std::vector& metric, + const int n, + const double pivot_threshold, + WfOrthonormalizeResult& result) + { for (int j = 0; j < n; ++j) { @@ -83,6 +124,8 @@ bool cholesky_lower_in_place(std::vector& metric, const int n, const dou if (!(diag > pivot_threshold) || !std::isfinite(diag)) { + result.failed_pivot_index = j; + result.failed_pivot = diag; return false; } @@ -109,25 +152,27 @@ bool cholesky_lower_in_place(std::vector& metric, const int n, const dou void apply_inverse_cholesky_left(const std::vector& lower, double* coeff, - const int nbands, + const int nactive_bands, const int nbasis) { - std::vector column(static_cast(nbands), 0.0); + std::vector column(static_cast(nactive_bands), 0.0); - // Solve L * C_new(:,mu) = C_old(:,mu) for every basis function mu. + // Solve L * C_new(:,mu) = C_old(:,mu) for every basis function mu. Only + // occupied/fractionally occupied bands are transformed; unoccupied bands do + // not contribute to the density matrix and are left unchanged. for (int mu = 0; mu < nbasis; ++mu) { - for (int ib = 0; ib < nbands; ++ib) + for (int ib = 0; ib < nactive_bands; ++ib) { double value = coeff[idx(ib, mu, nbasis)]; for (int jb = 0; jb < ib; ++jb) { - value -= lower[idx(ib, jb, nbands)] * column[jb]; + value -= lower[idx(ib, jb, nactive_bands)] * column[jb]; } - column[ib] = value / lower[idx(ib, ib, nbands)]; + column[ib] = value / lower[idx(ib, ib, nactive_bands)]; } - for (int ib = 0; ib < nbands; ++ib) + for (int ib = 0; ib < nactive_bands; ++ib) { coeff[idx(ib, mu, nbasis)] = column[ib]; } @@ -136,19 +181,19 @@ void apply_inverse_cholesky_left(const std::vector& lower, double max_orthonormality_deviation(const double* overlap, const double* coeff, - const int nbands, + const int nactive_bands, const int nbasis) { std::vector metric; - build_overlap_in_band_space(overlap, coeff, nbands, nbasis, metric); + build_overlap_in_band_space(overlap, coeff, nactive_bands, nbasis, metric); double max_dev = 0.0; - for (int i = 0; i < nbands; ++i) + for (int i = 0; i < nactive_bands; ++i) { - for (int j = 0; j < nbands; ++j) + for (int j = 0; j < nactive_bands; ++j) { const double expected = (i == j) ? 1.0 : 0.0; - max_dev = std::max(max_dev, std::abs(metric[idx(i, j, nbands)] - expected)); + max_dev = std::max(max_dev, std::abs(metric[idx(i, j, nactive_bands)] - expected)); } } return max_dev; @@ -158,12 +203,14 @@ double max_orthonormality_deviation(const double* overlap, WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, psi::Psi& wfc, + const ModuleBase::matrix& occupations, + const double occupation_threshold, const double pivot_threshold, const double check_tolerance) { WfOrthonormalizeResult result; - if (overlap == nullptr || !(pivot_threshold >= 0.0) || !(check_tolerance > 0.0)) + if (overlap == nullptr || !(occupation_threshold >= 0.0) || !(pivot_threshold >= 0.0) || !(check_tolerance > 0.0)) { result.status = WfcExtrapStatus::InvalidInput; return result; @@ -172,6 +219,9 @@ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, const int nstate = wfc.get_nk(); const int nbands = wfc.get_nbands(); const int nbasis = wfc.get_nbasis(); + result.nstate = nstate; + result.nbands = nbands; + result.nbasis = nbasis; if (nstate <= 0 || nbands <= 0 || nbasis <= 0 || nbands > nbasis) { @@ -185,6 +235,12 @@ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, return result; } + if (occupations.c == nullptr || occupations.nr < nstate || occupations.nc <= 0) + { + result.status = WfcExtrapStatus::DimensionMismatch; + return result; + } + // Operate on a full owned copy first. The caller's Psi is overwritten only after // all state/spin channels pass the positive-definiteness and residual checks. std::vector trial(wfc.size(), 0.0); @@ -192,25 +248,38 @@ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, std::copy(coeff_begin, coeff_begin + wfc.size(), trial.begin()); double max_deviation_all = 0.0; + int max_active_bands = 0; for (int istate = 0; istate < nstate; ++istate) { double* coeff = trial.data() + static_cast(istate) * static_cast(nbands) * static_cast(nbasis); + const int nactive_bands = active_bands_for_state(occupations, istate, nbands, occupation_threshold); + max_active_bands = std::max(max_active_bands, nactive_bands); + result.nactive_bands = nactive_bands; + + // Empty spin channels may occur in edge cases. They do not contribute to + // the density matrix, so there is no occupied subspace to reorthonormalize. + if (nactive_bands == 0) + { + continue; + } + std::vector metric; - build_overlap_in_band_space(overlap, coeff, nbands, nbasis, metric); + build_overlap_in_band_space(overlap, coeff, nactive_bands, nbasis, metric); + fill_metric_diagnostics(metric, nactive_bands, result); - if (!cholesky_lower_in_place(metric, nbands, pivot_threshold)) + if (!cholesky_lower_in_place(metric, nactive_bands, pivot_threshold, result)) { result.status = WfcExtrapStatus::OrthogonalizationFailed; result.failed_state = istate; return result; } - apply_inverse_cholesky_left(metric, coeff, nbands, nbasis); + apply_inverse_cholesky_left(metric, coeff, nactive_bands, nbasis); - const double max_dev = max_orthonormality_deviation(overlap, coeff, nbands, nbasis); + const double max_dev = max_orthonormality_deviation(overlap, coeff, nactive_bands, nbasis); if (!std::isfinite(max_dev) || max_dev > check_tolerance) { result.status = WfcExtrapStatus::OrthogonalizationFailed; @@ -223,6 +292,7 @@ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, wfc.set_all_psi(trial.data(), trial.size()); result.status = WfcExtrapStatus::Success; + result.nactive_bands = max_active_bands; result.max_deviation = max_deviation_all; return result; } diff --git a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.h b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.h index 25842a8d5f7..ab3bf382292 100644 --- a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.h +++ b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.h @@ -2,6 +2,7 @@ #define SOURCE_LCAO_MODULE_EXTRAP_WF_ORTHONORMALIZE_LCAO_H #include "source_lcao/module_extrap/wf_extrap_method.h" +#include "source_base/matrix.h" #include "source_psi/psi.h" namespace ModuleExtrap @@ -11,7 +12,21 @@ struct WfOrthonormalizeResult { WfcExtrapStatus status = WfcExtrapStatus::Success; double max_deviation = 0.0; + + // Diagnostics for orthogonalization failures. These values are kept in the + // result object so that the ESolver layer can print them without exposing + // implementation details of the orthonormalization helper. + double failed_pivot = 0.0; + double min_metric_diag = 0.0; + double max_metric_diag = 0.0; + double max_metric_abs = 0.0; + double max_metric_asymmetry = 0.0; int failed_state = -1; + int failed_pivot_index = -1; + int nstate = 0; + int nbands = 0; + int nbasis = 0; + int nactive_bands = 0; bool ok() const noexcept { @@ -35,12 +50,19 @@ struct WfOrthonormalizeResult * k-points. This routine therefore treats the first Psi dimension as a generic * state/channel index. * + * Only bands with non-negligible occupation are reorthonormalized. For the + * initial-density use case of use_prev_wf, completely unoccupied bands do not + * contribute to DMK/DMR/rho and orthonormalizing them can make the metric nearly + * singular when all basis-sized bands are present. + * * This is a dense serial utility for the first use_prev_wf implementation. The * distributed/k-point path should be implemented separately instead of silently * reusing this routine for block-cyclic matrices. */ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, psi::Psi& wfc, + const ModuleBase::matrix& occupations, + double occupation_threshold = 1.0e-12, double pivot_threshold = 1.0e-14, double check_tolerance = 1.0e-8); From 21c04d642e24c17251479384469142209ac533fe Mon Sep 17 00:00:00 2001 From: Growl Date: Sat, 6 Jun 2026 13:59:53 +0800 Subject: [PATCH 06/10] Drop support for C++11 standard --- CMakeLists.txt | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f3f511331f..88b2e97424e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -227,8 +227,11 @@ set(ABACUS_BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${ABACUS_BIN_NAME}) include_directories(${ABACUS_SOURCE_DIR}) include_directories(${ABACUS_SOURCE_DIR}/source_base/module_container) +set(ABACUS_MIN_CXX_STANDARD 14) if(NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD ${ABACUS_MIN_CXX_STANDARD}) +elseif(CMAKE_CXX_STANDARD LESS ABACUS_MIN_CXX_STANDARD) + message(FATAL_ERROR "ABACUS requires C++${ABACUS_MIN_CXX_STANDARD} or newer.") endif() set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -331,7 +334,6 @@ if(ENABLE_LCAO) target_link_libraries(${ABACUS_BIN_NAME} ${PEXSI_LIBRARY} ${SuperLU_DIST_LIBRARY} ${ParMETIS_LIBRARY} ${METIS_LIBRARY} pexsi) include_directories(${PEXSI_INCLUDE_DIR} ${ParMETIS_INCLUDE_DIR}) add_compile_definitions(__PEXSI) - set(CMAKE_CXX_STANDARD 14) endif() else() set(ENABLE_MLALGO OFF) @@ -452,7 +454,6 @@ if(USE_CUDA) # Always find CUDAToolkit to get CUDAToolkit_VERSION find_package(CUDAToolkit REQUIRED) - set_if_higher(CMAKE_CXX_STANDARD 14) if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "13.0") message(STATUS "CUDA ${CUDAToolkit_VERSION} detected. Setting CMAKE_CUDA_STANDARD to 17.") set_if_higher(CMAKE_CXX_STANDARD 17) @@ -687,8 +688,6 @@ if(ENABLE_MLALGO OR DEFINED Torch_DIR) find_package(Torch REQUIRED) if(NOT Torch_VERSION VERSION_LESS "2.1.0") set_if_higher(CMAKE_CXX_STANDARD 17) - elseif(NOT Torch_VERSION VERSION_LESS "1.5.0") - set_if_higher(CMAKE_CXX_STANDARD 14) endif() include_directories(${TORCH_INCLUDE_DIRS}) if(MKL_FOUND) @@ -750,7 +749,6 @@ if(DEFINED LIBRI_DIR) set(ENABLE_LIBRI ON) endif() if(ENABLE_LIBRI) - set_if_higher(CMAKE_CXX_STANDARD 14) if(LIBRI_DIR) else() find_package(LibRI REQUIRED) From bcc15614b8ee37e9c200f60e20791aac6a8f6588 Mon Sep 17 00:00:00 2001 From: Growl Date: Sat, 6 Jun 2026 15:15:20 +0800 Subject: [PATCH 07/10] Fix MPI parrallel problems --- source/source_esolver/esolver_ks_lcao.cpp | 17 +- .../module_extrap/wf_history_lcao.cpp | 2 + .../module_extrap/wf_history_lcao.h | 2 + .../module_extrap/wf_orthonormalize_lcao.cpp | 204 ++++++++++++++++-- .../module_extrap/wf_orthonormalize_lcao.h | 11 +- 5 files changed, 220 insertions(+), 16 deletions(-) diff --git a/source/source_esolver/esolver_ks_lcao.cpp b/source/source_esolver/esolver_ks_lcao.cpp index 0e79e0220cd..cf254592f6d 100644 --- a/source/source_esolver/esolver_ks_lcao.cpp +++ b/source/source_esolver/esolver_ks_lcao.cpp @@ -1,4 +1,5 @@ #include "esolver_ks_lcao.h" +#include "source_base/global_function.h" #include "source_base/module_external/blacs_connector.h" #include "source_cell/module_neighbor/sltk_atom_arrange.h" #include "source_estate/elecstate_tools.h" @@ -227,24 +228,38 @@ void ESolver_KS_LCAO::before_scf(UnitCell& ucell, const int istep) ModuleBase::WARNING_QUIT("ESolver_KS_LCAO::before_scf", "Failed to access the LCAO operator chain for WFN extrapolation."); } + ModuleBase::timer::start("WFN_Extrap", "prepare_overlap"); lcao_op->contributeHR(); - hamilt_lcao->updateSk(0, 0); + // Refresh the current Gamma-only overlap matrix before reusing the previous WFN. + // The layout must follow the active LCAO solver because the WFN orthonormalizer + // assembles distributed local blocks using the same column/row-major convention. + const int sk_layout = + ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(PARAM.inp.ks_solver) ? 1 : 0; + hamilt_lcao->updateSk(0, sk_layout); + ModuleBase::timer::end("WFN_Extrap", "prepare_overlap"); + + ModuleBase::timer::start("WFN_Extrap", "apply"); const ModuleExtrap::WfExtrapApplyResult wfc_result = this->wf_history_lcao_->try_use_prev_wf_gamma(hamilt_lcao->getSk(), + this->pv, *(this->psi), this->pelec->wg); + ModuleBase::timer::end("WFN_Extrap", "apply"); if (wfc_result.ok()) { + ModuleBase::timer::start("WFN_Extrap", "rebuild_density"); elecstate::cal_dm_psi(this->dmat.dm->get_paraV_pointer(), this->pelec->wg, *(this->psi), *(this->dmat.dm)); this->dmat.dm->cal_DMR(); LCAO_domain::dm2rho(this->dmat.dm->get_DMR_vector(), PARAM.inp.nspin, &this->chr); + ModuleBase::timer::end("WFN_Extrap", "rebuild_density"); initialized_by_wfc_extrap = true; GlobalV::ofs_running << " WFN extrapolation: use_prev_wf from ionic step " << wfc_result.snapshot_istep + << ", active bands = " << wfc_result.nactive_bands << ", max |C^T S C - I| = " << wfc_result.max_orthonormality_deviation << std::endl; } diff --git a/source/source_lcao/module_extrap/wf_history_lcao.cpp b/source/source_lcao/module_extrap/wf_history_lcao.cpp index cca040619e0..dd0b87516c5 100644 --- a/source/source_lcao/module_extrap/wf_history_lcao.cpp +++ b/source/source_lcao/module_extrap/wf_history_lcao.cpp @@ -89,6 +89,7 @@ bool WfHistoryLCAO::latest_snapshot(WfSnapshotLCAO& snapshot) const template WfExtrapApplyResult WfHistoryLCAO::try_use_prev_wf_gamma(const double* current_overlap, + const Parallel_Orbitals& pv, psi::Psi& psi, const ModuleBase::matrix& wg_now, const double pivot_threshold, @@ -148,6 +149,7 @@ WfExtrapApplyResult WfHistoryLCAO::try_use_prev_wf_gamma(const double* curre } const WfOrthonormalizeResult orth_result = reorthonormalize_gamma_lcao(current_overlap, + pv, psi_trial, snapshot.wg, 1.0e-12, diff --git a/source/source_lcao/module_extrap/wf_history_lcao.h b/source/source_lcao/module_extrap/wf_history_lcao.h index 5a739e7d292..d1da4e473f0 100644 --- a/source/source_lcao/module_extrap/wf_history_lcao.h +++ b/source/source_lcao/module_extrap/wf_history_lcao.h @@ -3,6 +3,7 @@ #include "source_lcao/module_extrap/wf_extrap_method.h" #include "source_lcao/module_extrap/wf_snapshot_lcao.h" +#include "source_basis/module_ao/parallel_orbitals.h" #include #include @@ -72,6 +73,7 @@ class WfHistoryLCAO * added in a separate path instead of silently reusing this helper. */ WfExtrapApplyResult try_use_prev_wf_gamma(const double* current_overlap, + const Parallel_Orbitals& pv, psi::Psi& psi, const ModuleBase::matrix& wg_now, double pivot_threshold = 1.0e-14, diff --git a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp index 859b0034284..2882351bcaf 100644 --- a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp +++ b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp @@ -2,8 +2,13 @@ #include #include +#include #include +#ifdef __MPI +#include +#endif + namespace ModuleExtrap { namespace @@ -199,9 +204,144 @@ double max_orthonormality_deviation(const double* overlap, return max_dev; } + +WfcExtrapStatus allreduce_dense_vector(std::vector& values, const Parallel_Orbitals& pv) +{ +#ifdef __MPI + if (!pv.is_serial) + { + const MPI_Comm comm = pv.comm(); + if (comm == MPI_COMM_NULL) + { + return WfcExtrapStatus::InvalidInput; + } + if (values.size() > static_cast(std::numeric_limits::max())) + { + return WfcExtrapStatus::InvalidInput; + } + MPI_Allreduce(MPI_IN_PLACE, + values.data(), + static_cast(values.size()), + MPI_DOUBLE, + MPI_SUM, + comm); + } +#endif + return WfcExtrapStatus::Success; +} + +WfcExtrapStatus assemble_global_overlap(const double* overlap, + const Parallel_Orbitals& pv, + std::vector& overlap_global) +{ + const int nbasis_global = pv.get_global_row_size(); + if (overlap == nullptr || nbasis_global <= 0 || pv.get_global_col_size() != nbasis_global) + { + return WfcExtrapStatus::DimensionMismatch; + } + + overlap_global.assign(static_cast(nbasis_global) * static_cast(nbasis_global), 0.0); + + const int nrow_local = pv.get_row_size(); + const int ncol_local = pv.get_col_size(); + for (int icol = 0; icol < ncol_local; ++icol) + { + const int icol_global = pv.local2global_col(icol); + for (int irow = 0; irow < nrow_local; ++irow) + { + const int irow_global = pv.local2global_row(irow); + // ABACUS/ScaLAPACK local matrices are stored column-major: + // local(irow, icol) -> overlap[icol * nrow_local + irow]. + overlap_global[idx(irow_global, icol_global, nbasis_global)] + = overlap[static_cast(icol) * static_cast(nrow_local) + + static_cast(irow)]; + } + } + + return allreduce_dense_vector(overlap_global, pv); +} + +int local_band_to_global(const int ib_local, const Parallel_Orbitals& pv) +{ + return (ib_local / pv.nb * pv.dim1 + pv.coord[1]) * pv.nb + ib_local % pv.nb; +} + +int local_wfc_col_to_global(const int ib_local, const Parallel_Orbitals& pv, const bool coeff_columns_are_basis) +{ + // Gamma-only direct diagonalization solvers store Psi columns with the same + // distribution as the AO/basis columns, i.e. local Psi column -> global basis + // column. Other NAO paths may store only the requested band columns; in that + // case the band-column distribution follows the ScaLAPACK formula used by + // Parallel_Orbitals::set_nloc_wfc_Eij(). + return coeff_columns_are_basis ? pv.local2global_col(ib_local) : local_band_to_global(ib_local, pv); +} + +WfcExtrapStatus assemble_global_coeff_active(const double* coeff_local, + const Parallel_Orbitals& pv, + const int ncoeff_local, + const int nbasis_local, + const int nactive_bands, + const bool coeff_columns_are_basis, + std::vector& coeff_global) +{ + const int nbasis_global = pv.get_global_row_size(); + if (coeff_local == nullptr || nbasis_global <= 0 || ncoeff_local <= 0 || nbasis_local <= 0 + || nactive_bands < 0 || pv.get_row_size() != nbasis_local) + { + return WfcExtrapStatus::DimensionMismatch; + } + + coeff_global.assign(static_cast(nactive_bands) * static_cast(nbasis_global), 0.0); + + for (int ib_local = 0; ib_local < ncoeff_local; ++ib_local) + { + const int ib_global = local_wfc_col_to_global(ib_local, pv, coeff_columns_are_basis); + if (ib_global >= nactive_bands) + { + continue; + } + for (int mu_local = 0; mu_local < nbasis_local; ++mu_local) + { + const int mu_global = pv.local2global_row(mu_local); + coeff_global[idx(ib_global, mu_global, nbasis_global)] + = coeff_local[static_cast(ib_local) * static_cast(nbasis_local) + + static_cast(mu_local)]; + } + } + + return allreduce_dense_vector(coeff_global, pv); +} + +void scatter_global_coeff_active(const std::vector& coeff_global, + const Parallel_Orbitals& pv, + const int ncoeff_local, + const int nbasis_local, + const int nactive_bands, + const bool coeff_columns_are_basis, + double* coeff_local) +{ + const int nbasis_global = pv.get_global_row_size(); + for (int ib_local = 0; ib_local < ncoeff_local; ++ib_local) + { + const int ib_global = local_wfc_col_to_global(ib_local, pv, coeff_columns_are_basis); + if (ib_global >= nactive_bands) + { + continue; + } + for (int mu_local = 0; mu_local < nbasis_local; ++mu_local) + { + const int mu_global = pv.local2global_row(mu_local); + coeff_local[static_cast(ib_local) * static_cast(nbasis_local) + + static_cast(mu_local)] + = coeff_global[idx(ib_global, mu_global, nbasis_global)]; + } + } +} + } // namespace WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, + const Parallel_Orbitals& pv, psi::Psi& wfc, const ModuleBase::matrix& occupations, const double occupation_threshold, @@ -217,13 +357,21 @@ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, } const int nstate = wfc.get_nk(); - const int nbands = wfc.get_nbands(); - const int nbasis = wfc.get_nbasis(); + const int ncoeff_local = wfc.get_nbands(); + const int nbasis_local = wfc.get_nbasis(); + const int nbasis_global = pv.get_global_row_size(); + const int ncoeff_global_basis = pv.get_global_col_size(); + const int ncoeff_global_bands = pv.get_wfc_global_nbands(); + const bool coeff_columns_are_basis = (ncoeff_local == pv.get_col_size()); + const bool coeff_columns_are_bands = (ncoeff_local == pv.ncol_bands); + const int ncoeff_global = coeff_columns_are_basis ? ncoeff_global_basis : ncoeff_global_bands; result.nstate = nstate; - result.nbands = nbands; - result.nbasis = nbasis; + result.nbands = ncoeff_global; + result.nbasis = nbasis_global; - if (nstate <= 0 || nbands <= 0 || nbasis <= 0 || nbands > nbasis) + if (nstate <= 0 || ncoeff_local <= 0 || nbasis_local <= 0 || nbasis_global <= 0 || ncoeff_global <= 0 + || ncoeff_global > nbasis_global || pv.get_row_size() != nbasis_local + || pv.get_global_col_size() != nbasis_global || (!coeff_columns_are_basis && !coeff_columns_are_bands)) { result.status = WfcExtrapStatus::DimensionMismatch; return result; @@ -241,6 +389,13 @@ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, return result; } + std::vector overlap_global; + result.status = assemble_global_overlap(overlap, pv, overlap_global); + if (!result.ok()) + { + return result; + } + // Operate on a full owned copy first. The caller's Psi is overwritten only after // all state/spin channels pass the positive-definiteness and residual checks. std::vector trial(wfc.size(), 0.0); @@ -251,11 +406,11 @@ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, int max_active_bands = 0; for (int istate = 0; istate < nstate; ++istate) { - double* coeff = trial.data() - + static_cast(istate) * static_cast(nbands) - * static_cast(nbasis); + double* coeff_local = trial.data() + + static_cast(istate) * static_cast(ncoeff_local) + * static_cast(nbasis_local); - const int nactive_bands = active_bands_for_state(occupations, istate, nbands, occupation_threshold); + const int nactive_bands = active_bands_for_state(occupations, istate, ncoeff_global, occupation_threshold); max_active_bands = std::max(max_active_bands, nactive_bands); result.nactive_bands = nactive_bands; @@ -266,8 +421,22 @@ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, continue; } + std::vector coeff_global; + result.status = assemble_global_coeff_active(coeff_local, + pv, + ncoeff_local, + nbasis_local, + nactive_bands, + coeff_columns_are_basis, + coeff_global); + if (!result.ok()) + { + result.failed_state = istate; + return result; + } + std::vector metric; - build_overlap_in_band_space(overlap, coeff, nactive_bands, nbasis, metric); + build_overlap_in_band_space(overlap_global.data(), coeff_global.data(), nactive_bands, nbasis_global, metric); fill_metric_diagnostics(metric, nactive_bands, result); if (!cholesky_lower_in_place(metric, nactive_bands, pivot_threshold, result)) @@ -277,9 +446,12 @@ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, return result; } - apply_inverse_cholesky_left(metric, coeff, nactive_bands, nbasis); + apply_inverse_cholesky_left(metric, coeff_global.data(), nactive_bands, nbasis_global); - const double max_dev = max_orthonormality_deviation(overlap, coeff, nactive_bands, nbasis); + const double max_dev = max_orthonormality_deviation(overlap_global.data(), + coeff_global.data(), + nactive_bands, + nbasis_global); if (!std::isfinite(max_dev) || max_dev > check_tolerance) { result.status = WfcExtrapStatus::OrthogonalizationFailed; @@ -288,6 +460,14 @@ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, return result; } max_deviation_all = std::max(max_deviation_all, max_dev); + + scatter_global_coeff_active(coeff_global, + pv, + ncoeff_local, + nbasis_local, + nactive_bands, + coeff_columns_are_basis, + coeff_local); } wfc.set_all_psi(trial.data(), trial.size()); diff --git a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.h b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.h index ab3bf382292..bf02bce0609 100644 --- a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.h +++ b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.h @@ -2,6 +2,7 @@ #define SOURCE_LCAO_MODULE_EXTRAP_WF_ORTHONORMALIZE_LCAO_H #include "source_lcao/module_extrap/wf_extrap_method.h" +#include "source_basis/module_ao/parallel_orbitals.h" #include "source_base/matrix.h" #include "source_psi/psi.h" @@ -55,11 +56,15 @@ struct WfOrthonormalizeResult * contribute to DMK/DMR/rho and orthonormalizing them can make the metric nearly * singular when all basis-sized bands are present. * - * This is a dense serial utility for the first use_prev_wf implementation. The - * distributed/k-point path should be implemented separately instead of silently - * reusing this routine for block-cyclic matrices. + * The implementation is MPI-aware for the Gamma-only block-cyclic NAO layout. + * It assembles dense global work matrices from distributed local blocks, performs + * the small Cholesky transformation redundantly on all ranks, and scatters the + * transformed coefficients back to the local Psi layout. This favors correctness + * and a simple reviewable backend over peak performance; a future optimized path + * can replace the dense work arrays with fully distributed PBLAS operations. */ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, + const Parallel_Orbitals& pv, psi::Psi& wfc, const ModuleBase::matrix& occupations, double occupation_threshold = 1.0e-12, From 4932345c23f0aa989f4f372fe814826c8446a556 Mon Sep 17 00:00:00 2001 From: Growl Date: Sat, 6 Jun 2026 16:06:31 +0800 Subject: [PATCH 08/10] Skip charge extrapolation when using WFN-based path --- source/source_esolver/esolver_fp.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/source/source_esolver/esolver_fp.cpp b/source/source_esolver/esolver_fp.cpp index 1ddb636c8ab..c3379320c0a 100644 --- a/source/source_esolver/esolver_fp.cpp +++ b/source/source_esolver/esolver_fp.cpp @@ -155,8 +155,20 @@ void ESolver_FP::before_scf(UnitCell& ucell, const int istep) if (ucell.ionic_position_updated) { this->CE.update_all_dis(ucell); - this->CE.extrapolate_charge(&this->Pgrid, ucell, &this->chr, &this->sf, - GlobalV::ofs_running, GlobalV::ofs_warning); + + const bool skip_charge_extrap_for_wfc = PARAM.inp.basis_type == "lcao" + && PARAM.inp.wfc_extrap != "none" + && istep > 0; + if (skip_charge_extrap_for_wfc) + { + GlobalV::ofs_running << " charge density extrapolation is skipped because wfc_extrap = " + << PARAM.inp.wfc_extrap << "." << std::endl; + } + else + { + this->CE.extrapolate_charge(&this->Pgrid, ucell, &this->chr, &this->sf, + GlobalV::ofs_running, GlobalV::ofs_warning); + } } //! calculate D2 or D3 vdW From 830f9567e69939e4decc78acf6f6bc250ec7b33b Mon Sep 17 00:00:00 2001 From: Growl Date: Sat, 6 Jun 2026 16:06:48 +0800 Subject: [PATCH 09/10] Add timer for WFN backend --- .../module_extrap/wf_history_lcao.cpp | 8 ++- .../module_extrap/wf_orthonormalize_lcao.cpp | 66 +++++++++++++------ 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/source/source_lcao/module_extrap/wf_history_lcao.cpp b/source/source_lcao/module_extrap/wf_history_lcao.cpp index dd0b87516c5..50bc306b2e2 100644 --- a/source/source_lcao/module_extrap/wf_history_lcao.cpp +++ b/source/source_lcao/module_extrap/wf_history_lcao.cpp @@ -1,6 +1,7 @@ #include "source_lcao/module_extrap/wf_history_lcao.h" #include "source_lcao/module_extrap/wf_orthonormalize_lcao.h" +#include "source_base/timer.h" #include #include @@ -142,12 +143,16 @@ WfExtrapApplyResult WfHistoryLCAO::try_use_prev_wf_gamma(const double* curre // Work on an owned trial Psi first. The caller's Psi is not overwritten // unless the stored WFN can be loaded and reorthonormalized successfully. psi::Psi psi_trial(psi); - if (!snapshot.load_to(psi_trial)) + ModuleBase::timer::start("WFN_Extrap", "restore_snapshot"); + const bool snapshot_loaded = snapshot.load_to(psi_trial); + ModuleBase::timer::end("WFN_Extrap", "restore_snapshot"); + if (!snapshot_loaded) { result.status = WfcExtrapStatus::DimensionMismatch; return result; } + ModuleBase::timer::start("WFN_Extrap", "orthonormalize"); const WfOrthonormalizeResult orth_result = reorthonormalize_gamma_lcao(current_overlap, pv, psi_trial, @@ -155,6 +160,7 @@ WfExtrapApplyResult WfHistoryLCAO::try_use_prev_wf_gamma(const double* curre 1.0e-12, pivot_threshold, check_tolerance); + ModuleBase::timer::end("WFN_Extrap", "orthonormalize"); if (!orth_result.ok()) { result.status = orth_result.status; diff --git a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp index 2882351bcaf..e1dc455e5e4 100644 --- a/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp +++ b/source/source_lcao/module_extrap/wf_orthonormalize_lcao.cpp @@ -1,4 +1,5 @@ #include "source_lcao/module_extrap/wf_orthonormalize_lcao.h" +#include "source_base/timer.h" #include #include @@ -14,6 +15,24 @@ namespace ModuleExtrap namespace { +class TimerGuard +{ + public: + TimerGuard(const char* group, const char* name) : group_(group), name_(name) + { + ModuleBase::timer::start(group_, name_); + } + + ~TimerGuard() + { + ModuleBase::timer::end(group_, name_); + } + + private: + const char* group_; + const char* name_; +}; + inline std::size_t idx(const int row, const int col, const int ncol) noexcept { return static_cast(row) * static_cast(ncol) + static_cast(col); @@ -234,6 +253,7 @@ WfcExtrapStatus assemble_global_overlap(const double* overlap, const Parallel_Orbitals& pv, std::vector& overlap_global) { + TimerGuard timer("WFN_Extrap", "assemble_overlap"); const int nbasis_global = pv.get_global_row_size(); if (overlap == nullptr || nbasis_global <= 0 || pv.get_global_col_size() != nbasis_global) { @@ -284,6 +304,7 @@ WfcExtrapStatus assemble_global_coeff_active(const double* coeff_local, const bool coeff_columns_are_basis, std::vector& coeff_global) { + TimerGuard timer("WFN_Extrap", "assemble_coeff"); const int nbasis_global = pv.get_global_row_size(); if (coeff_local == nullptr || nbasis_global <= 0 || ncoeff_local <= 0 || nbasis_local <= 0 || nactive_bands < 0 || pv.get_row_size() != nbasis_local) @@ -320,6 +341,7 @@ void scatter_global_coeff_active(const std::vector& coeff_global, const bool coeff_columns_are_basis, double* coeff_local) { + TimerGuard timer("WFN_Extrap", "scatter_coeff"); const int nbasis_global = pv.get_global_row_size(); for (int ib_local = 0; ib_local < ncoeff_local; ++ib_local) { @@ -435,29 +457,33 @@ WfOrthonormalizeResult reorthonormalize_gamma_lcao(const double* overlap, return result; } - std::vector metric; - build_overlap_in_band_space(overlap_global.data(), coeff_global.data(), nactive_bands, nbasis_global, metric); - fill_metric_diagnostics(metric, nactive_bands, result); - - if (!cholesky_lower_in_place(metric, nactive_bands, pivot_threshold, result)) + double max_dev = 0.0; { - result.status = WfcExtrapStatus::OrthogonalizationFailed; - result.failed_state = istate; - return result; - } + TimerGuard timer("WFN_Extrap", "cholesky_transform"); + std::vector metric; + build_overlap_in_band_space(overlap_global.data(), coeff_global.data(), nactive_bands, nbasis_global, metric); + fill_metric_diagnostics(metric, nactive_bands, result); - apply_inverse_cholesky_left(metric, coeff_global.data(), nactive_bands, nbasis_global); + if (!cholesky_lower_in_place(metric, nactive_bands, pivot_threshold, result)) + { + result.status = WfcExtrapStatus::OrthogonalizationFailed; + result.failed_state = istate; + return result; + } - const double max_dev = max_orthonormality_deviation(overlap_global.data(), - coeff_global.data(), - nactive_bands, - nbasis_global); - if (!std::isfinite(max_dev) || max_dev > check_tolerance) - { - result.status = WfcExtrapStatus::OrthogonalizationFailed; - result.failed_state = istate; - result.max_deviation = max_dev; - return result; + apply_inverse_cholesky_left(metric, coeff_global.data(), nactive_bands, nbasis_global); + + max_dev = max_orthonormality_deviation(overlap_global.data(), + coeff_global.data(), + nactive_bands, + nbasis_global); + if (!std::isfinite(max_dev) || max_dev > check_tolerance) + { + result.status = WfcExtrapStatus::OrthogonalizationFailed; + result.failed_state = istate; + result.max_deviation = max_dev; + return result; + } } max_deviation_all = std::max(max_deviation_all, max_dev); From 88fee09fc1dc9050e2c676ea9071321c48f147ba Mon Sep 17 00:00:00 2001 From: Growl Date: Sat, 6 Jun 2026 17:34:50 +0800 Subject: [PATCH 10/10] module_extrap/CMakeLists.txt: set objects rather than apeend --- source/source_lcao/module_extrap/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/source_lcao/module_extrap/CMakeLists.txt b/source/source_lcao/module_extrap/CMakeLists.txt index fcf83f56ff4..1b8c10f8f0d 100644 --- a/source/source_lcao/module_extrap/CMakeLists.txt +++ b/source/source_lcao/module_extrap/CMakeLists.txt @@ -1,4 +1,4 @@ -list(APPEND objects +set(objects wf_history_lcao.cpp wf_orthonormalize_lcao.cpp )