AMReX-Codes
diff --git a/‎.github/workflows/cuda.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/cuda.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/dependencies/dependencies_cmake.sh‎
Lines changed: 31 additions & 0 deletions b/‎.github/workflows/dependencies/dependencies_cmake.sh‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎.github/workflows/dependencies/dependencies_nvcc.sh‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/dependencies/dependencies_nvcc.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/dependencies/dependencies_nvcc_2404.sh‎
Lines changed: 44 additions & 0 deletions b/‎.github/workflows/dependencies/dependencies_nvcc_2404.sh‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎CHANGES‎
Lines changed: 60 additions & 0 deletions b/‎CHANGES‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎Docs/sphinx_documentation/source/Visualization.rst‎
Lines changed: 4 additions & 0 deletions b/‎Docs/sphinx_documentation/source/Visualization.rst‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎Src/AmrCore/AMReX_Interp_C.H‎
Lines changed: 54 additions & 0 deletions b/‎Src/AmrCore/AMReX_Interp_C.H‎
Lines changed: 54 additions & 0 deletions
diff --git a/‎Src/AmrCore/AMReX_Interpolater.cpp‎
Lines changed: 57 additions & 18 deletions b/‎Src/AmrCore/AMReX_Interpolater.cpp‎
Lines changed: 57 additions & 18 deletions
diff --git a/‎Src/Base/AMReX.cpp‎
Lines changed: 13 additions & 0 deletions b/‎Src/Base/AMReX.cpp‎
Lines changed: 13 additions & 0 deletions
@@ -185,15 +185,15 @@ jobs:
 
   # Build 3D libamrex cuda build with configure
   configure-3d-cuda:
-    name: CUDA@12.6 [configure 3D]
+    name: CUDA@13.0 [configure 3D]
     runs-on: ubuntu-24.04
     needs: check_changes
     if: needs.check_changes.outputs.has_non_docs_changes == 'true'
     steps:
     - uses: actions/checkout@v5
     - name: Dependencies
       run: |
-        .github/workflows/dependencies/dependencies_nvcc.sh 12.6
+        .github/workflows/dependencies/dependencies_nvcc_2404.sh 13.0
         .github/workflows/dependencies/dependencies_ccache.sh
     - name: Set Up Cache
       uses: actions/cache@v4
@@ -215,7 +215,7 @@ jobs:
         # /home/runner/work/amrex/amrex/Src/Base/AMReX_GpuLaunchGlobal.H:16:41: error: unused parameter ‘f0’ [-Werror=unused-parameter]
         #    16 |     AMREX_GPU_GLOBAL void launch_global (L f0) { f0(); }
         #
-        make -j4 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names -Wno-unused-parameter" CCACHE=ccache CUDA_ARCH="7.0 7.2"
+        make -j4 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names -Wno-unused-parameter" CCACHE=ccache CUDA_ARCH="8.0 9.0"
         make install
 
         ccache -s
 
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+set -eu -o pipefail
+
+# `man apt.conf`:
+#   Number of retries to perform. If this is non-zero APT will retry
+#   failed files the given number of times.
+echo 'Acquire::Retries "3";' | sudo tee /etc/apt/apt.conf.d/80-retries
+
+test -f /usr/share/doc/kitware-archive-keyring/copyright ||
+wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
+
+if [[ ! -f /etc/apt/trusted.gpg.d/apt.llvm.org.asc ]]; then
+    wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc
+fi
+
+source /etc/os-release # set UBUNTU_CODENAME
+
+echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ ${UBUNTU_CODENAME} main" | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null
+
+sudo apt-get update
+
+test -f /usr/share/doc/kitware-archive-keyring/copyright ||
+sudo rm /usr/share/keyrings/kitware-archive-keyring.gpg
+
+sudo apt-get install kitware-archive-keyring
+
+sudo apt-get install -y --no-install-recommends cmake
+
+sudo rm -f /usr/local/bin/cmake
+cmake --version
@@ -25,7 +25,7 @@ sudo apt-get install -y \
     wget
 
 VERSION_DOTTED=${1-12.0} && VERSION_DASHED=$(sed 's/\./-/' <<< $VERSION_DOTTED)
-curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
+curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb
 sudo dpkg -i cuda-keyring_1.0-1_all.deb
 sudo apt-get update
 sudo apt-get install -y \
 
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+#
+# Copyright 2020-2022 Axel Huebl
+#
+# License: BSD-3-Clause-LBNL
+
+set -eu -o pipefail
+
+# `man apt.conf`:
+#   Number of retries to perform. If this is non-zero APT will retry
+#   failed files the given number of times.
+echo 'Acquire::Retries "3";' | sudo tee /etc/apt/apt.conf.d/80-retries
+
+sudo apt-get -qqq update
+sudo apt-get install -y \
+    build-essential     \
+    ca-certificates     \
+    cmake               \
+    g++                 \
+    gfortran            \
+    gnupg               \
+    libopenmpi-dev      \
+    openmpi-bin         \
+    pkg-config          \
+    wget
+
+VERSION_DOTTED=${1-12.0} && VERSION_DASHED=$(sed 's/\./-/' <<< $VERSION_DOTTED)
+curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
+sudo apt-get update
+sudo apt-get install -y \
+    cuda-command-line-tools-$VERSION_DASHED \
+    cuda-compiler-$VERSION_DASHED           \
+    cuda-cupti-dev-$VERSION_DASHED          \
+    cuda-minimal-build-$VERSION_DASHED      \
+    cuda-nvml-dev-$VERSION_DASHED           \
+    cuda-nvtx-$VERSION_DASHED               \
+    libcufft-dev-$VERSION_DASHED            \
+    libcurand-dev-$VERSION_DASHED           \
+    libcusparse-dev-$VERSION_DASHED
+
+sudo apt-get install -y --no-install-recommends libnvjitlink-dev-$VERSION_DASHED || true
+
+sudo ln -s cuda-$VERSION_DOTTED /usr/local/cuda
@@ -1,3 +1,63 @@
+# 25.09
+
+ ## Highlights:
+
+  * amrex::callNoinline: Call given function without inline (#4606)
+
+    This new function can be used to force noinlining functions. It can be
+    useful when it's necessary to reduce GPU kernel sizes.
+
+  * Incorrect cross term sum for 2D EB terms in MLEBTensor (#4626)
+
+    This fixes a very old bug in the computation of the cross terms of the
+    Navier-Stokes stress tensor in the 2D EB case. The bug affected AMReX
+    based codes that use the MLEBTensor operator in linear solvers.
+
+  * Add ParallelForOMP (#4595)
+    Fix ParallelForOMP introduced in #4595 (#4604)
+
+    amrex::ParallelFor in CPU builds does not spawn OpenMP threads, because
+    it is usually used inside coarse-grained OpenMP regions launched at the
+    MFIter level. However, in some cases, the users may need to start OpenMP
+    parallel regions in the loops over cells. This new function has been
+    added for this purpose.
+
+  * Add ToString function for array and tuple (#4584)
+
+    This adds a `ToString` function that can be used with scalars, arrays
+    and tuples to write error messages or when debugging.
+
+ ## Other major changes:
+
+  * Delay some synchronize calls in addParticles (#4623)
+    Fix a bug in #4623 (#4631)
+
+  * Fix Bug in FaceConservativeLinear (#4630)
+
+  * Generalize the average_*_to_cellcenter routines to take IntVect (#4627)
+
+  * Fix atomicSetID wrapper access (#4625)
+
+  * Fallback to default `CUDA_ARCH` if `nvidia-smi` fails (#4624)
+
+  * Enable OpenMP in addParticles (#4615)
+
+  * Fix potential false sharing in ReduceOps.eval with OMP (#4618)
+
+  * Enable zero-sized Type in TypeMultiplier (#4617)
+
+  * Add ParamParse::getPrefix (#4612)
+
+  * SIMD: Portable Masks, C++20 (#4607)
+
+  * Improve support for Particles with PolymorphicArenaAllocator (#4603)
+
+  * Improvements to AMREX_ASSERT (#4581)
+
+  * SIMD: Remove Unnecessary Namespaces (#4600)
+
+  * Modify how we interpolate velocity from faces to particle position (#4598)
+
 # 25.08
 
  ## Highlights:
 
@@ -300,6 +300,8 @@ To open a plotfile (for example, you could run the
    In the later case, Paraview will load the plotfiles as a time series.
    ParaView will ask you about the file type -- choose "AMReX/BoxLib Grid Reader" or
    "AMReX/BoxLib Particles Reader".
+   Note that if your ploftile prefix is not ``plt`` or any other type supported by default,
+   then in ``Files of type`` you need to first select ``All files (*)``.
 
 #. Under the "Cell Arrays" field, select a variable (e.g., "phi") and click
    "Apply". Note that the default number of refinement levels loaded and visualized is 1.
@@ -367,6 +369,8 @@ save this script. Then run the bash script by executing the following command in
     bash write_series_file.sh
 
 This will generate a file ``plot_files.series`` which indexes the time variable based on the order of the plotfile numbers.
+Note that if your ploftile prefix is not ``plt``, you can manually edit
+``write_series_file.sh`` accordingly.
 
 To make a ``.series`` file which reads the time out of the plotfile header, use :download:`write_series_file_timestamp.sh </Visualization/write_series_file_timestamp.sh>`.
 
 
@@ -283,6 +283,60 @@ face_linear_interp_z (int i, int j, int k, int n, amrex::Array4<amrex::Real> con
     }
 }
 
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+Real face_linear_interp_safe_x (int i, int j, int k, int n,
+                                amrex::Array4<amrex::Real> const& fine,
+                                IntVect const& ratio) noexcept
+{
+    const int ci = amrex::coarsen(i,ratio[0]);
+
+    if (i-ci*ratio[0] != 0)
+    {
+        Real const w = static_cast<Real>(i-ci*ratio[0]) * (Real(1.)/Real(ratio[0]));
+        int i1 = ci*ratio[0];
+        int i2 = (ci+1)*ratio[0];
+        return (Real(1.)-w) * fine(i1,j,k,n) + w * fine(i2,j,k,n);
+    } else {
+        return fine(i,j,k,n);
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+Real face_linear_interp_safe_y (int i, int j, int k, int n,
+                                amrex::Array4<amrex::Real> const& fine,
+                                IntVect const& ratio) noexcept
+{
+    const int cj = amrex::coarsen(j,ratio[1]);
+
+    if (j-cj*ratio[1] != 0)
+    {
+        Real const w = static_cast<Real>(j-cj*ratio[1]) * (Real(1.)/Real(ratio[1]));
+        int j1 = cj*ratio[1];
+        int j2 = (cj+1)*ratio[1];
+        return (Real(1.)-w) * fine(i,j1,k,n) + w * fine(i,j2,k,n);
+    } else {
+        return fine(i,j,k,n);
+    }
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+Real face_linear_interp_safe_z (int i, int j, int k, int n,
+                                amrex::Array4<amrex::Real> const& fine,
+                                IntVect const& ratio) noexcept
+{
+    const int ck = amrex::coarsen(k,ratio[2]);
+
+    if (k-ck*ratio[2] != 0)
+    {
+        Real const w = static_cast<Real>(k-ck*ratio[2]) * (Real(1.)/Real(ratio[2]));
+        int k1 = ck*ratio[2];
+        int k2 = (ck+1)*ratio[2];
+        return (Real(1.)-w) * fine(i,j,k1,n) + w * fine(i,j,k2,n);
+    } else {
+        return fine(i,j,k,n);
+    }
+}
+
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
 void cell_quartic_interp_x (int i, int j, int k, int n, Array4<Real> const& fine,
                             Array4<Real const> const& crse) noexcept
 
@@ -516,6 +516,24 @@ FaceConservativeLinear::interp_face (const FArrayBox&       crse,
         }
     }
 
+    bool is_safe = true;
+    FArrayBox safe_fine;
+    Array4<Real> safe_fine_arr = fine_arr;
+    Box safe_fine_region = fine_region;
+    int facedir = 0;
+    for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
+        if (fine_region.type(idim) == IndexType::NODE) { facedir = idim; }
+    }
+    IntVect rrtmp(1);
+    rrtmp[facedir] = ratio[facedir];
+    if (! safe_fine_region.coarsenable(rrtmp)) {
+        is_safe = false;
+        safe_fine_region.coarsen(rrtmp);
+        safe_fine_region.refine(rrtmp);
+        safe_fine.resize(safe_fine_region, ncomp, The_Async_Arena());
+        safe_fine_arr = safe_fine.array();
+    }
+
     //
     // Fill fine ghost faces with interpolation of coarse data that is conservative linear
     //      in the tangential direction.
@@ -525,25 +543,25 @@ FaceConservativeLinear::interp_face (const FArrayBox&       crse,
     //
     if (fine_region.type(0) == IndexType::NODE)
     {
-        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,safe_fine_region,ncomp,i,j,k,n,
         {
-            face_cons_linear_face_interp(i,j,k,n,fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,0);
+            face_cons_linear_face_interp(i,j,k,n,safe_fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,0);
         });
     }
 #if (AMREX_SPACEDIM >= 2)
     else if (fine_region.type(1) == IndexType::NODE)
     {
-        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,safe_fine_region,ncomp,i,j,k,n,
         {
-            face_cons_linear_face_interp(i,j,k,n,fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,1);
+            face_cons_linear_face_interp(i,j,k,n,safe_fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,1);
         });
     }
 #if (AMREX_SPACEDIM == 3)
     else
     {
-        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,safe_fine_region,ncomp,i,j,k,n,
         {
-            face_cons_linear_face_interp(i,j,k,n,fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,2);
+            face_cons_linear_face_interp(i,j,k,n,safe_fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,2);
         });
     }
 #endif
@@ -556,26 +574,47 @@ FaceConservativeLinear::interp_face (const FArrayBox&       crse,
     //
     if (fine_region.type(0) == IndexType::NODE)
     {
-        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
-        {
-            face_linear_interp_x(i,j,k,n,fine_arr,ratio);
-        });
+        if (is_safe) {
+            AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+            {
+                face_linear_interp_x(i,j,k,n,fine_arr,ratio);
+            });
+        } else {
+            AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+            {
+                fine_arr(i,j,k,n) = face_linear_interp_safe_x(i,j,k,n,safe_fine_arr,ratio);
+            });
+        }
     }
 #if (AMREX_SPACEDIM >= 2)
     else if (fine_region.type(1) == IndexType::NODE)
     {
-        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
-        {
-            face_linear_interp_y(i,j,k,n,fine_arr,ratio);
-        });
+        if (is_safe) {
+            AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+            {
+                face_linear_interp_y(i,j,k,n,fine_arr,ratio);
+            });
+        } else {
+            AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+            {
+                fine_arr(i,j,k,n) = face_linear_interp_safe_y(i,j,k,n,safe_fine_arr,ratio);
+            });
+        }
     }
 #if (AMREX_SPACEDIM == 3)
     else
     {
-        AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
-        {
-            face_linear_interp_z(i,j,k,n,fine_arr,ratio);
-        });
+        if (is_safe) {
+            AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+            {
+                face_linear_interp_z(i,j,k,n,fine_arr,ratio);
+            });
+        } else {
+            AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
+            {
+                fine_arr(i,j,k,n) = face_linear_interp_safe_z(i,j,k,n,safe_fine_arr,ratio);
+            });
+        }
     }
 #endif
 #endif
 
@@ -744,6 +744,19 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse,
 #endif
 #endif
 
+        if (system::verbose > 0) {
+#if defined(HYPRE_DEVELOP_STRING) && defined(HYPRE_BRANCH_NAME)
+            amrex::Print() << "HYPRE (" << HYPRE_DEVELOP_STRING
+                           << " - " << HYPRE_BRANCH_NAME
+                           << " branch) initialized" << '\n';
+
+#elif defined(HYPRE_DEVELOP_STRING) && !defined(HYPRE_BRANCH_NAME)
+            amrex::Print() << "HYPRE (" << HYPRE_DEVELOP_STRING << ") initialized" << '\n';
+
+#elif defined(HYPRE_RELEASE_VERSION)
+            amrex::Print() << "HYPRE (" << HYPRE_RELEASE_VERSION << ") initialized" << '\n';
+#endif
+        }
     }
 #endif