Fix mxr, use estimate for every reference (#7036)

Cstandardlib · web-flow · commit a0bcf722b2d1 · 2026-03-18T08:24:54.000+08:00
diff --git a/source/source_hamilt/module_ewald/H_Ewald_pw.cpp b/source/source_hamilt/module_ewald/H_Ewald_pw.cpp
@@ -12,6 +12,18 @@ int H_Ewald_pw::mxr = 200;
 H_Ewald_pw::H_Ewald_pw(){};
 H_Ewald_pw::~H_Ewald_pw(){};
 
+int H_Ewald_pw::estimate_mxr(const double &rmax, const ModuleBase::Matrix3 &bg)
+{
+    double bg1[3];
+    bg1[0] = bg.e11; bg1[1] = bg.e12; bg1[2] = bg.e13;
+    const int nm1 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
+    bg1[0] = bg.e21; bg1[1] = bg.e22; bg1[2] = bg.e23;
+    const int nm2 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
+    bg1[0] = bg.e31; bg1[1] = bg.e32; bg1[2] = bg.e33;
+    const int nm3 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
+    return (2 * nm1 + 1) * (2 * nm2 + 1) * (2 * nm3 + 1);
+}
+
 double H_Ewald_pw::compute_ewald(const UnitCell& cell,
                                  const ModulePW::PW_Basis* rho_basis,
                                  const ModuleBase::ComplexMatrix& strucFac)
@@ -150,16 +162,7 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell,
     // Compute rmax and dynamically determine mxr (maximum number of r-vectors)
     // to avoid buffer overflow for very small unit cells or high cutoff energies.
     rmax = 4.0 / sqrt(alpha) / cell.lat0;
-    {
-        double bg1[3];
-        bg1[0] = cell.G.e11; bg1[1] = cell.G.e12; bg1[2] = cell.G.e13;
-        int nm1 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
-        bg1[0] = cell.G.e21; bg1[1] = cell.G.e22; bg1[2] = cell.G.e23;
-        int nm2 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
-        bg1[0] = cell.G.e31; bg1[1] = cell.G.e32; bg1[2] = cell.G.e33;
-        int nm3 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
-        mxr = (2 * nm1 + 1) * (2 * nm2 + 1) * (2 * nm3 + 1);
-    }
+    mxr = H_Ewald_pw::estimate_mxr(rmax, cell.G);
 
     if(PARAM.inp.test_energy) 
     {
@@ -205,7 +208,7 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell,
             // calculate tau[na1]-tau[na2]
             dtau = cell.atoms[it1].tau[ia1] - cell.atoms[it2].tau[ia2];
             // generates nearest-neighbors shells
-            H_Ewald_pw::rgen(dtau, rmax, irr, cell.latvec, cell.G, r, r2, nrm);
+            H_Ewald_pw::rgen(dtau, rmax, irr, cell.latvec, cell.G, r, r2, mxr, nrm);
             // at-->cell.latvec, bg-->G
             // and sum to the real space part
 
@@ -249,7 +252,7 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell,
                         //calculate tau[na]-tau[nb]
                         dtau = cell.atoms[nt1].tau[na] - cell.atoms[nt2].tau[nb];
                         //generates nearest-neighbors shells
-                        H_Ewald_pw::rgen(dtau, rmax, irr, cell.latvec, cell.G, r, r2, nrm);
+                        H_Ewald_pw::rgen(dtau, rmax, irr, cell.latvec, cell.G, r, r2, mxr, nrm);
                         // at-->cell.latvec, bg-->G
                         // and sum to the real space part
 
@@ -301,6 +304,7 @@ void H_Ewald_pw::rgen(
     const ModuleBase::Matrix3 &G,
     ModuleBase::Vector3<double> *r,
     double *r2,
+    const int mxr,
     int &nrm)
 {
     //-------------------------------------------------------------------
diff --git a/source/source_hamilt/module_ewald/H_Ewald_pw.h b/source/source_hamilt/module_ewald/H_Ewald_pw.h
@@ -20,6 +20,8 @@ class H_Ewald_pw
                                 const ModuleBase::ComplexMatrix& strucFac);
 
   public:
+    static int estimate_mxr(const double &rmax, const ModuleBase::Matrix3 &bg);
+
     static void rgen(
         const ModuleBase::Vector3<double> &dtau,
         const double &rmax,
@@ -28,6 +30,7 @@ class H_Ewald_pw
         const ModuleBase::Matrix3 &bg,
         ModuleBase::Vector3<double> *r,
         double *r2,
+      const int mxr,
         int  &nrm
     );
 
diff --git a/source/source_hamilt/test/rgen_test.cpp b/source/source_hamilt/test/rgen_test.cpp
@@ -43,13 +43,12 @@ TEST_F(RgenTest, ZeroRmax)
 {
     // When rmax==0 the function should return immediately with nrm=0
     const int mxr_test = 10;
-    H_Ewald_pw::mxr = mxr_test;
     std::vector<ModuleBase::Vector3<double>> r(mxr_test);
     std::vector<double> r2(mxr_test);
     std::vector<int> irr(mxr_test);
     int nrm = 0;
 
-    H_Ewald_pw::rgen(dtau, 0.0, irr.data(), latvec, G, r.data(), r2.data(), nrm);
+    H_Ewald_pw::rgen(dtau, 0.0, irr.data(), latvec, G, r.data(), r2.data(), mxr_test, nrm);
 
     EXPECT_EQ(nrm, 0);
 }
@@ -60,13 +59,12 @@ TEST_F(RgenTest, SimpleCubicNearestNeighbors)
     // neighbors: 6 + 12 = 18 vectors total.
     const double rmax = 1.5;
     const int mxr_test = 50;
-    H_Ewald_pw::mxr = mxr_test;
     std::vector<ModuleBase::Vector3<double>> r(mxr_test);
     std::vector<double> r2(mxr_test);
     std::vector<int> irr(mxr_test);
     int nrm = 0;
 
-    H_Ewald_pw::rgen(dtau, rmax, irr.data(), latvec, G, r.data(), r2.data(), nrm);
+    H_Ewald_pw::rgen(dtau, rmax, irr.data(), latvec, G, r.data(), r2.data(), mxr_test, nrm);
 
     EXPECT_EQ(nrm, 18);
 
@@ -94,14 +92,13 @@ TEST_F(RgenTest, SimpleCubicNonZeroDtau)
     // No lattice point coincides with dtau, so neither is excluded.
     const double rmax = 0.6;
     const int mxr_test = 10;
-    H_Ewald_pw::mxr = mxr_test;
     dtau = ModuleBase::Vector3<double>(0.5, 0.0, 0.0);
     std::vector<ModuleBase::Vector3<double>> r(mxr_test);
     std::vector<double> r2(mxr_test);
     std::vector<int> irr(mxr_test);
     int nrm = 0;
 
-    H_Ewald_pw::rgen(dtau, rmax, irr.data(), latvec, G, r.data(), r2.data(), nrm);
+    H_Ewald_pw::rgen(dtau, rmax, irr.data(), latvec, G, r.data(), r2.data(), mxr_test, nrm);
 
     EXPECT_EQ(nrm, 2);
     for (int i = 0; i < nrm; ++i)
@@ -128,13 +125,12 @@ TEST_F(RgenTest, LargeRmaxExceedsOriginalLimit)
     int nm3 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
     const int mxr_test = (2 * nm1 + 1) * (2 * nm2 + 1) * (2 * nm3 + 1);
 
-    H_Ewald_pw::mxr = mxr_test;
     std::vector<ModuleBase::Vector3<double>> r(mxr_test);
     std::vector<double> r2(mxr_test);
     std::vector<int> irr(mxr_test);
     int nrm = 0;
 
-    H_Ewald_pw::rgen(dtau, rmax, irr.data(), latvec, G, r.data(), r2.data(), nrm);
+    H_Ewald_pw::rgen(dtau, rmax, irr.data(), latvec, G, r.data(), r2.data(), mxr_test, nrm);
 
     // Must exceed the old hard-coded limit that caused the crash
     EXPECT_GT(nrm, 200);
diff --git a/source/source_pw/module_pwdft/forces.cpp b/source/source_pw/module_pwdft/forces.cpp
@@ -657,7 +657,7 @@ void Forces<FPTYPE, Device>::cal_force_ew(const UnitCell& ucell,
             int nrm = 0;
 
             // output of rgen: the number of vectors in the sphere
-            const int mxr = 200;
+            const int mxr = H_Ewald_pw::estimate_mxr(rmax, ucell.G);
             // the maximum number of R vectors included in r
             std::vector<ModuleBase::Vector3<double>> r(mxr);
             std::vector<double> r2(mxr);
@@ -681,7 +681,7 @@ void Forces<FPTYPE, Device>::cal_force_ew(const UnitCell& ucell,
                     {
                         ModuleBase::Vector3<double> d_tau
                             = ucell.atoms[T1].tau[I1] - ucell.atoms[T2].tau[I2];
-                        H_Ewald_pw::rgen(d_tau, rmax, irr.data(), ucell.latvec, ucell.G, r.data(), r2.data(), nrm);
+                        H_Ewald_pw::rgen(d_tau, rmax, irr.data(), ucell.latvec, ucell.G, r.data(), r2.data(), mxr, nrm);
 
                         for (int n = 0; n < nrm; n++)
                         {
diff --git a/source/source_pw/module_pwdft/stress_ewa.cpp b/source/source_pw/module_pwdft/stress_ewa.cpp
@@ -108,7 +108,6 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
 	}
 
     //R-space sum here (only for the processor that contains G=0) 
-    int mxr = 200;
     int *irr=nullptr;
     ModuleBase::Vector3<FPTYPE> *r;
     FPTYPE *r2=nullptr;
@@ -121,13 +120,14 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
 
 	if(ig0 >= 0)
 	{
-		std::vector<ModuleBase::Vector3<FPTYPE>> r(mxr);
-		std::vector<FPTYPE> r2(mxr);
-		std::vector<int> irr(mxr);
-
 		FPTYPE sqa = sqrt(alpha);
 		FPTYPE sq8a_2pi = sqrt(8 * alpha / (ModuleBase::TWO_PI));
 		rmax = 4.0/sqa/ucell.lat0;
+		const int mxr = H_Ewald_pw::estimate_mxr(rmax, ucell.G);
+
+		std::vector<ModuleBase::Vector3<FPTYPE>> r(mxr);
+		std::vector<FPTYPE> r2(mxr);
+		std::vector<int> irr(mxr);
 
 		#pragma omp for
 		for(long long ijat = 0; ijat < ucell.nat * ucell.nat; ijat++)
@@ -142,7 +142,7 @@ void Stress_Func<FPTYPE, Device>::stress_ewa(const UnitCell& ucell,
 				//calculate tau[na]-tau[nb]
 				d_tau = ucell.atoms[it].tau[i] - ucell.atoms[jt].tau[j];
 				//generates nearest-neighbors shells 
-				H_Ewald_pw::rgen(d_tau, rmax, irr.data(), ucell.latvec, ucell.G, r.data(), r2.data(), nrm);
+				H_Ewald_pw::rgen(d_tau, rmax, irr.data(), ucell.latvec, ucell.G, r.data(), r2.data(), mxr, nrm);
 				for(int nr=0; nr<nrm; nr++)
 				{
 					rr=sqrt(r2[nr]) * ucell.lat0;