Fix buffer overflow in H_Ewald_pw::rgen for very small unit cells / high ecutwfc (#7025)

Copilot · Cstandardlib · web-flow · commit 7962634012b8 · 2026-03-14T12:15:54.000+08:00
* Initial plan

* fix: dynamically compute mxr in H_Ewald_pw to prevent buffer overflow for small unit cells

Co-authored-by: Cstandardlib &lt;49788094+Cstandardlib@users.noreply.github.com&gt;

* test: add unit tests for H_Ewald_pw::rgen including large-rmax buffer-overflow regression

Co-authored-by: Cstandardlib &lt;49788094+Cstandardlib@users.noreply.github.com&gt;

* test: clarify misleading comment in SimpleCubicNonZeroDtau test

Co-authored-by: Cstandardlib &lt;49788094+Cstandardlib@users.noreply.github.com&gt;

---------

Co-authored-by: copilot-swe-agent[bot] &lt;198982749+Copilot@users.noreply.github.com&gt;
Co-authored-by: Cstandardlib &lt;49788094+Cstandardlib@users.noreply.github.com&gt;
diff --git a/source/source_hamilt/module_ewald/H_Ewald_pw.cpp b/source/source_hamilt/module_ewald/H_Ewald_pw.cpp
@@ -50,19 +50,7 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell,
     // buffer variable
     // used to optimize alpha
 
-	if(PARAM.inp.test_energy) 
-    {
-        ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"mxr",mxr);
-    }
-    //r  = new ModuleBase::Vector3<double>[mxr];
-    //r2 = new double[mxr];
-    //int* irr = new int[mxr];
-    std::vector<ModuleBase::Vector3<double>> vec_r(mxr);
-    std::vector<double> vec_r2(mxr);
-    std::vector<int> vec_irr(mxr);
-    int* irr = vec_irr.data();
-    ModuleBase::Vector3<double>* r = vec_r.data();
-    double* r2 = vec_r2.data();
+    // (arrays are allocated below, after rmax and mxr are determined)
 
     // (1) calculate total ionic charge
     double charge = 0.0;
@@ -158,8 +146,33 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell,
 
     // R-space sum here (only done for the processor that contains G=0)
     ewaldr = 0.0;
-#ifdef __MPI
+
+    // Compute rmax and dynamically determine mxr (maximum number of r-vectors)
+    // to avoid buffer overflow for very small unit cells or high cutoff energies.
     rmax = 4.0 / sqrt(alpha) / cell.lat0;
+    {
+        double bg1[3];
+        bg1[0] = cell.G.e11; bg1[1] = cell.G.e12; bg1[2] = cell.G.e13;
+        int nm1 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
+        bg1[0] = cell.G.e21; bg1[1] = cell.G.e22; bg1[2] = cell.G.e23;
+        int nm2 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
+        bg1[0] = cell.G.e31; bg1[1] = cell.G.e32; bg1[2] = cell.G.e33;
+        int nm3 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
+        mxr = (2 * nm1 + 1) * (2 * nm2 + 1) * (2 * nm3 + 1);
+    }
+
+    if(PARAM.inp.test_energy) 
+    {
+        ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"mxr",mxr);
+    }
+    std::vector<ModuleBase::Vector3<double>> vec_r(mxr);
+    std::vector<double> vec_r2(mxr);
+    std::vector<int> vec_irr(mxr);
+    int* irr = vec_irr.data();
+    ModuleBase::Vector3<double>* r = vec_r.data();
+    double* r2 = vec_r2.data();
+
+#ifdef __MPI
     if(PARAM.inp.test_energy) 
     {
         ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"rmax(unit lat0)",rmax);
@@ -220,8 +233,7 @@ double H_Ewald_pw::compute_ewald(const UnitCell& cell,
 #else
     if (rho_basis->ig_gge0 >= 0)
     {	
-        rmax = 4.0 / sqrt(alpha) / cell.lat0;
-		if(PARAM.inp.test_energy) ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"rmax(unit lat0)",rmax);
+        if(PARAM.inp.test_energy) ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"rmax(unit lat0)",rmax);
         // with this choice terms up to ZiZj*erfc(4) are counted (erfc(4)=2x10^-8
         int nt1=0;
         int nt2=0;
@@ -385,9 +397,10 @@ void H_Ewald_pw::rgen(
 
                 if (tt <= rmax * rmax && std::abs(tt) > 1.e-10)
                 {
-                    if (nrm > mxr)
+                    if (nrm >= mxr)
                     {
-                        std::cerr << "\n rgen, too many r-vectors," << nrm;
+                        ModuleBase::WARNING_QUIT("rgen", "too many r-vectors (nrm=" + std::to_string(nrm)
+                                                 + ", mxr=" + std::to_string(mxr) + "). Please report this issue.");
                     }
                     r[nrm] = t;
                     r2[nrm] = tt;
diff --git a/source/source_hamilt/test/CMakeLists.txt b/source/source_hamilt/test/CMakeLists.txt
@@ -2,3 +2,9 @@ AddTest(
   TARGET MODULE_HAMILT_ewald_dnrm2
   SOURCES dnrm2_test.cpp  ../module_ewald/dnrm2.cpp
 )
+
+AddTest(
+  TARGET MODULE_HAMILT_ewald_rgen
+  LIBS parameter ${math_libs} base device
+  SOURCES rgen_test.cpp ../module_ewald/H_Ewald_pw.cpp ../module_ewald/dnrm2.cpp
+)
diff --git a/source/source_hamilt/test/rgen_test.cpp b/source/source_hamilt/test/rgen_test.cpp
@@ -0,0 +1,154 @@
+#include "gtest/gtest.h"
+#include "gmock/gmock.h"
+#include "../module_ewald/H_Ewald_pw.h"
+#include "../module_ewald/dnrm2.h"
+#include "source_base/matrix3.h"
+#include <vector>
+
+/************************************************
+ *  unit test of H_Ewald_pw::rgen
+ ***********************************************/
+
+/**
+ * - Tested Functions:
+ *   - H_Ewald_pw::rgen():
+ *      - Generates lattice vectors R such that |R - dtau| <= rmax,
+ *        and returns them sorted by ascending magnitude.
+ *      - Tested cases:
+ *        1. rmax == 0.0: no vectors returned.
+ *        2. Simple cubic cell, small rmax: correct count + sorted order.
+ *        3. Large rmax exceeding original fixed mxr=200 limit: verifies
+ *           that the dynamic mxr sizing introduced in the bug fix works
+ *           correctly and does not overflow the allocated arrays.
+ */
+
+class RgenTest : public ::testing::Test
+{
+  protected:
+    // Simple cubic unit cell: latvec = G = identity
+    ModuleBase::Matrix3 latvec;
+    ModuleBase::Matrix3 G;
+    ModuleBase::Vector3<double> dtau;
+
+    void SetUp() override
+    {
+        // Unit cubic cell: direct and reciprocal lattice vectors are identity
+        latvec = ModuleBase::Matrix3(1, 0, 0, 0, 1, 0, 0, 0, 1);
+        G = ModuleBase::Matrix3(1, 0, 0, 0, 1, 0, 0, 0, 1);
+        dtau = ModuleBase::Vector3<double>(0.0, 0.0, 0.0);
+    }
+};
+
+TEST_F(RgenTest, ZeroRmax)
+{
+    // When rmax==0 the function should return immediately with nrm=0
+    const int mxr_test = 10;
+    H_Ewald_pw::mxr = mxr_test;
+    std::vector<ModuleBase::Vector3<double>> r(mxr_test);
+    std::vector<double> r2(mxr_test);
+    std::vector<int> irr(mxr_test);
+    int nrm = 0;
+
+    H_Ewald_pw::rgen(dtau, 0.0, irr.data(), latvec, G, r.data(), r2.data(), nrm);
+
+    EXPECT_EQ(nrm, 0);
+}
+
+TEST_F(RgenTest, SimpleCubicNearestNeighbors)
+{
+    // rmax = 1.5 captures nearest (d=1) and next-nearest (d=sqrt(2)~1.414)
+    // neighbors: 6 + 12 = 18 vectors total.
+    const double rmax = 1.5;
+    const int mxr_test = 50;
+    H_Ewald_pw::mxr = mxr_test;
+    std::vector<ModuleBase::Vector3<double>> r(mxr_test);
+    std::vector<double> r2(mxr_test);
+    std::vector<int> irr(mxr_test);
+    int nrm = 0;
+
+    H_Ewald_pw::rgen(dtau, rmax, irr.data(), latvec, G, r.data(), r2.data(), nrm);
+
+    EXPECT_EQ(nrm, 18);
+
+    // Vectors must be sorted in ascending order of |r|^2
+    for (int i = 1; i < nrm; ++i)
+    {
+        EXPECT_LE(r2[i - 1], r2[i]);
+    }
+
+    // All returned vectors must lie strictly inside the sphere
+    for (int i = 0; i < nrm; ++i)
+    {
+        EXPECT_LE(r2[i], rmax * rmax + 1.0e-10);
+        EXPECT_GT(r2[i], 1.0e-10);
+    }
+}
+
+TEST_F(RgenTest, SimpleCubicNonZeroDtau)
+{
+    // rgen computes t = R - dtau for each lattice vector R=(i,j,k)*latvec,
+    // and excludes vectors with |t|^2 < 1e-10 (i.e. R == dtau).
+    // With dtau=(0.5,0,0) and rmax=0.6, two vectors qualify:
+    //   R=(0,0,0): t = (0,0,0)-(0.5,0,0) = (-0.5,0,0), |t|^2=0.25 <= 0.36
+    //   R=(1,0,0): t = (1,0,0)-(0.5,0,0) = ( 0.5,0,0), |t|^2=0.25 <= 0.36
+    // No lattice point coincides with dtau, so neither is excluded.
+    const double rmax = 0.6;
+    const int mxr_test = 10;
+    H_Ewald_pw::mxr = mxr_test;
+    dtau = ModuleBase::Vector3<double>(0.5, 0.0, 0.0);
+    std::vector<ModuleBase::Vector3<double>> r(mxr_test);
+    std::vector<double> r2(mxr_test);
+    std::vector<int> irr(mxr_test);
+    int nrm = 0;
+
+    H_Ewald_pw::rgen(dtau, rmax, irr.data(), latvec, G, r.data(), r2.data(), nrm);
+
+    EXPECT_EQ(nrm, 2);
+    for (int i = 0; i < nrm; ++i)
+    {
+        EXPECT_NEAR(r2[i], 0.25, 1.0e-10);
+    }
+}
+
+TEST_F(RgenTest, LargeRmaxExceedsOriginalLimit)
+{
+    // rmax=4.0 on a unit cubic cell yields ~499 r-vectors, well above the
+    // old fixed limit of mxr=200 that caused the buffer overflow.
+    // This test verifies that with a properly sized mxr the function
+    // completes without error.
+    const double rmax = 4.0;
+
+    // Replicate the dynamic mxr computation from compute_ewald()
+    double bg1[3];
+    bg1[0] = G.e11; bg1[1] = G.e12; bg1[2] = G.e13;
+    int nm1 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
+    bg1[0] = G.e21; bg1[1] = G.e22; bg1[2] = G.e23;
+    int nm2 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
+    bg1[0] = G.e31; bg1[1] = G.e32; bg1[2] = G.e33;
+    int nm3 = (int)(dnrm2(3, bg1, 1) * rmax + 2);
+    const int mxr_test = (2 * nm1 + 1) * (2 * nm2 + 1) * (2 * nm3 + 1);
+
+    H_Ewald_pw::mxr = mxr_test;
+    std::vector<ModuleBase::Vector3<double>> r(mxr_test);
+    std::vector<double> r2(mxr_test);
+    std::vector<int> irr(mxr_test);
+    int nrm = 0;
+
+    H_Ewald_pw::rgen(dtau, rmax, irr.data(), latvec, G, r.data(), r2.data(), nrm);
+
+    // Must exceed the old hard-coded limit that caused the crash
+    EXPECT_GT(nrm, 200);
+
+    // All returned vectors lie within the sphere
+    for (int i = 0; i < nrm; ++i)
+    {
+        EXPECT_LE(r2[i], rmax * rmax + 1.0e-10);
+        EXPECT_GT(r2[i], 1.0e-10);
+    }
+
+    // Vectors are sorted in ascending order of |r|^2
+    for (int i = 1; i < nrm; ++i)
+    {
+        EXPECT_LE(r2[i - 1], r2[i]);
+    }
+}