Skip to content

Commit 606c97e

Browse files
committed
Merge branch 'development' into ApplyBCTagsMLEBABecLap
2 parents 098316d + 92e9993 commit 606c97e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1135
-201
lines changed

.github/workflows/cuda.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -185,15 +185,15 @@ jobs:
185185
186186
# Build 3D libamrex cuda build with configure
187187
configure-3d-cuda:
188-
name: CUDA@12.6 [configure 3D]
188+
name: CUDA@13.0 [configure 3D]
189189
runs-on: ubuntu-24.04
190190
needs: check_changes
191191
if: needs.check_changes.outputs.has_non_docs_changes == 'true'
192192
steps:
193193
- uses: actions/checkout@v5
194194
- name: Dependencies
195195
run: |
196-
.github/workflows/dependencies/dependencies_nvcc.sh 12.6
196+
.github/workflows/dependencies/dependencies_nvcc_2404.sh 13.0
197197
.github/workflows/dependencies/dependencies_ccache.sh
198198
- name: Set Up Cache
199199
uses: actions/cache@v4
@@ -215,7 +215,7 @@ jobs:
215215
# /home/runner/work/amrex/amrex/Src/Base/AMReX_GpuLaunchGlobal.H:16:41: error: unused parameter ‘f0’ [-Werror=unused-parameter]
216216
# 16 | AMREX_GPU_GLOBAL void launch_global (L f0) { f0(); }
217217
#
218-
make -j4 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names -Wno-unused-parameter" CCACHE=ccache CUDA_ARCH="7.0 7.2"
218+
make -j4 WARN_ALL=TRUE WARN_ERROR=TRUE XTRA_CXXFLAGS="-fno-operator-names -Wno-unused-parameter" CCACHE=ccache CUDA_ARCH="8.0 9.0"
219219
make install
220220
221221
ccache -s
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/usr/bin/env bash
2+
3+
set -eu -o pipefail
4+
5+
# `man apt.conf`:
6+
# Number of retries to perform. If this is non-zero APT will retry
7+
# failed files the given number of times.
8+
echo 'Acquire::Retries "3";' | sudo tee /etc/apt/apt.conf.d/80-retries
9+
10+
test -f /usr/share/doc/kitware-archive-keyring/copyright ||
11+
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
12+
13+
if [[ ! -f /etc/apt/trusted.gpg.d/apt.llvm.org.asc ]]; then
14+
wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc
15+
fi
16+
17+
source /etc/os-release # set UBUNTU_CODENAME
18+
19+
echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ ${UBUNTU_CODENAME} main" | sudo tee /etc/apt/sources.list.d/kitware.list >/dev/null
20+
21+
sudo apt-get update
22+
23+
test -f /usr/share/doc/kitware-archive-keyring/copyright ||
24+
sudo rm /usr/share/keyrings/kitware-archive-keyring.gpg
25+
26+
sudo apt-get install kitware-archive-keyring
27+
28+
sudo apt-get install -y --no-install-recommends cmake
29+
30+
sudo rm -f /usr/local/bin/cmake
31+
cmake --version

.github/workflows/dependencies/dependencies_nvcc.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ sudo apt-get install -y \
2525
wget
2626

2727
VERSION_DOTTED=${1-12.0} && VERSION_DASHED=$(sed 's/\./-/' <<< $VERSION_DOTTED)
28-
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
28+
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb
2929
sudo dpkg -i cuda-keyring_1.0-1_all.deb
3030
sudo apt-get update
3131
sudo apt-get install -y \
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Copyright 2020-2022 Axel Huebl
4+
#
5+
# License: BSD-3-Clause-LBNL
6+
7+
set -eu -o pipefail
8+
9+
# `man apt.conf`:
10+
# Number of retries to perform. If this is non-zero APT will retry
11+
# failed files the given number of times.
12+
echo 'Acquire::Retries "3";' | sudo tee /etc/apt/apt.conf.d/80-retries
13+
14+
sudo apt-get -qqq update
15+
sudo apt-get install -y \
16+
build-essential \
17+
ca-certificates \
18+
cmake \
19+
g++ \
20+
gfortran \
21+
gnupg \
22+
libopenmpi-dev \
23+
openmpi-bin \
24+
pkg-config \
25+
wget
26+
27+
VERSION_DOTTED=${1-12.0} && VERSION_DASHED=$(sed 's/\./-/' <<< $VERSION_DOTTED)
28+
curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb
29+
sudo dpkg -i cuda-keyring_1.1-1_all.deb
30+
sudo apt-get update
31+
sudo apt-get install -y \
32+
cuda-command-line-tools-$VERSION_DASHED \
33+
cuda-compiler-$VERSION_DASHED \
34+
cuda-cupti-dev-$VERSION_DASHED \
35+
cuda-minimal-build-$VERSION_DASHED \
36+
cuda-nvml-dev-$VERSION_DASHED \
37+
cuda-nvtx-$VERSION_DASHED \
38+
libcufft-dev-$VERSION_DASHED \
39+
libcurand-dev-$VERSION_DASHED \
40+
libcusparse-dev-$VERSION_DASHED
41+
42+
sudo apt-get install -y --no-install-recommends libnvjitlink-dev-$VERSION_DASHED || true
43+
44+
sudo ln -s cuda-$VERSION_DOTTED /usr/local/cuda

CHANGES

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,63 @@
1+
# 25.09
2+
3+
## Highlights:
4+
5+
* amrex::callNoinline: Call given function without inline (#4606)
6+
7+
This new function can be used to force noinlining functions. It can be
8+
useful when it's necessary to reduce GPU kernel sizes.
9+
10+
* Incorrect cross term sum for 2D EB terms in MLEBTensor (#4626)
11+
12+
This fixes a very old bug in the computation of the cross terms of the
13+
Navier-Stokes stress tensor in the 2D EB case. The bug affected AMReX
14+
based codes that use the MLEBTensor operator in linear solvers.
15+
16+
* Add ParallelForOMP (#4595)
17+
Fix ParallelForOMP introduced in #4595 (#4604)
18+
19+
amrex::ParallelFor in CPU builds does not spawn OpenMP threads, because
20+
it is usually used inside coarse-grained OpenMP regions launched at the
21+
MFIter level. However, in some cases, the users may need to start OpenMP
22+
parallel regions in the loops over cells. This new function has been
23+
added for this purpose.
24+
25+
* Add ToString function for array and tuple (#4584)
26+
27+
This adds a `ToString` function that can be used with scalars, arrays
28+
and tuples to write error messages or when debugging.
29+
30+
## Other major changes:
31+
32+
* Delay some synchronize calls in addParticles (#4623)
33+
Fix a bug in #4623 (#4631)
34+
35+
* Fix Bug in FaceConservativeLinear (#4630)
36+
37+
* Generalize the average_*_to_cellcenter routines to take IntVect (#4627)
38+
39+
* Fix atomicSetID wrapper access (#4625)
40+
41+
* Fallback to default `CUDA_ARCH` if `nvidia-smi` fails (#4624)
42+
43+
* Enable OpenMP in addParticles (#4615)
44+
45+
* Fix potential false sharing in ReduceOps.eval with OMP (#4618)
46+
47+
* Enable zero-sized Type in TypeMultiplier (#4617)
48+
49+
* Add ParamParse::getPrefix (#4612)
50+
51+
* SIMD: Portable Masks, C++20 (#4607)
52+
53+
* Improve support for Particles with PolymorphicArenaAllocator (#4603)
54+
55+
* Improvements to AMREX_ASSERT (#4581)
56+
57+
* SIMD: Remove Unnecessary Namespaces (#4600)
58+
59+
* Modify how we interpolate velocity from faces to particle position (#4598)
60+
161
# 25.08
262

363
## Highlights:

Docs/sphinx_documentation/source/Visualization.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,8 @@ To open a plotfile (for example, you could run the
300300
In the later case, Paraview will load the plotfiles as a time series.
301301
ParaView will ask you about the file type -- choose "AMReX/BoxLib Grid Reader" or
302302
"AMReX/BoxLib Particles Reader".
303+
Note that if your ploftile prefix is not ``plt`` or any other type supported by default,
304+
then in ``Files of type`` you need to first select ``All files (*)``.
303305

304306
#. Under the "Cell Arrays" field, select a variable (e.g., "phi") and click
305307
"Apply". Note that the default number of refinement levels loaded and visualized is 1.
@@ -367,6 +369,8 @@ save this script. Then run the bash script by executing the following command in
367369
bash write_series_file.sh
368370

369371
This will generate a file ``plot_files.series`` which indexes the time variable based on the order of the plotfile numbers.
372+
Note that if your ploftile prefix is not ``plt``, you can manually edit
373+
``write_series_file.sh`` accordingly.
370374

371375
To make a ``.series`` file which reads the time out of the plotfile header, use :download:`write_series_file_timestamp.sh </Visualization/write_series_file_timestamp.sh>`.
372376

Src/AmrCore/AMReX_Interp_C.H

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,60 @@ face_linear_interp_z (int i, int j, int k, int n, amrex::Array4<amrex::Real> con
283283
}
284284
}
285285

286+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
287+
Real face_linear_interp_safe_x (int i, int j, int k, int n,
288+
amrex::Array4<amrex::Real> const& fine,
289+
IntVect const& ratio) noexcept
290+
{
291+
const int ci = amrex::coarsen(i,ratio[0]);
292+
293+
if (i-ci*ratio[0] != 0)
294+
{
295+
Real const w = static_cast<Real>(i-ci*ratio[0]) * (Real(1.)/Real(ratio[0]));
296+
int i1 = ci*ratio[0];
297+
int i2 = (ci+1)*ratio[0];
298+
return (Real(1.)-w) * fine(i1,j,k,n) + w * fine(i2,j,k,n);
299+
} else {
300+
return fine(i,j,k,n);
301+
}
302+
}
303+
304+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
305+
Real face_linear_interp_safe_y (int i, int j, int k, int n,
306+
amrex::Array4<amrex::Real> const& fine,
307+
IntVect const& ratio) noexcept
308+
{
309+
const int cj = amrex::coarsen(j,ratio[1]);
310+
311+
if (j-cj*ratio[1] != 0)
312+
{
313+
Real const w = static_cast<Real>(j-cj*ratio[1]) * (Real(1.)/Real(ratio[1]));
314+
int j1 = cj*ratio[1];
315+
int j2 = (cj+1)*ratio[1];
316+
return (Real(1.)-w) * fine(i,j1,k,n) + w * fine(i,j2,k,n);
317+
} else {
318+
return fine(i,j,k,n);
319+
}
320+
}
321+
322+
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
323+
Real face_linear_interp_safe_z (int i, int j, int k, int n,
324+
amrex::Array4<amrex::Real> const& fine,
325+
IntVect const& ratio) noexcept
326+
{
327+
const int ck = amrex::coarsen(k,ratio[2]);
328+
329+
if (k-ck*ratio[2] != 0)
330+
{
331+
Real const w = static_cast<Real>(k-ck*ratio[2]) * (Real(1.)/Real(ratio[2]));
332+
int k1 = ck*ratio[2];
333+
int k2 = (ck+1)*ratio[2];
334+
return (Real(1.)-w) * fine(i,j,k1,n) + w * fine(i,j,k2,n);
335+
} else {
336+
return fine(i,j,k,n);
337+
}
338+
}
339+
286340
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
287341
void cell_quartic_interp_x (int i, int j, int k, int n, Array4<Real> const& fine,
288342
Array4<Real const> const& crse) noexcept

Src/AmrCore/AMReX_Interpolater.cpp

Lines changed: 57 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,24 @@ FaceConservativeLinear::interp_face (const FArrayBox& crse,
516516
}
517517
}
518518

519+
bool is_safe = true;
520+
FArrayBox safe_fine;
521+
Array4<Real> safe_fine_arr = fine_arr;
522+
Box safe_fine_region = fine_region;
523+
int facedir = 0;
524+
for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
525+
if (fine_region.type(idim) == IndexType::NODE) { facedir = idim; }
526+
}
527+
IntVect rrtmp(1);
528+
rrtmp[facedir] = ratio[facedir];
529+
if (! safe_fine_region.coarsenable(rrtmp)) {
530+
is_safe = false;
531+
safe_fine_region.coarsen(rrtmp);
532+
safe_fine_region.refine(rrtmp);
533+
safe_fine.resize(safe_fine_region, ncomp, The_Async_Arena());
534+
safe_fine_arr = safe_fine.array();
535+
}
536+
519537
//
520538
// Fill fine ghost faces with interpolation of coarse data that is conservative linear
521539
// in the tangential direction.
@@ -525,25 +543,25 @@ FaceConservativeLinear::interp_face (const FArrayBox& crse,
525543
//
526544
if (fine_region.type(0) == IndexType::NODE)
527545
{
528-
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
546+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,safe_fine_region,ncomp,i,j,k,n,
529547
{
530-
face_cons_linear_face_interp(i,j,k,n,fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,0);
548+
face_cons_linear_face_interp(i,j,k,n,safe_fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,0);
531549
});
532550
}
533551
#if (AMREX_SPACEDIM >= 2)
534552
else if (fine_region.type(1) == IndexType::NODE)
535553
{
536-
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
554+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,safe_fine_region,ncomp,i,j,k,n,
537555
{
538-
face_cons_linear_face_interp(i,j,k,n,fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,1);
556+
face_cons_linear_face_interp(i,j,k,n,safe_fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,1);
539557
});
540558
}
541559
#if (AMREX_SPACEDIM == 3)
542560
else
543561
{
544-
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
562+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,safe_fine_region,ncomp,i,j,k,n,
545563
{
546-
face_cons_linear_face_interp(i,j,k,n,fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,2);
564+
face_cons_linear_face_interp(i,j,k,n,safe_fine_arr,crse_arr,mask_arr,ratio,per_grown_domain,2);
547565
});
548566
}
549567
#endif
@@ -556,26 +574,47 @@ FaceConservativeLinear::interp_face (const FArrayBox& crse,
556574
//
557575
if (fine_region.type(0) == IndexType::NODE)
558576
{
559-
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
560-
{
561-
face_linear_interp_x(i,j,k,n,fine_arr,ratio);
562-
});
577+
if (is_safe) {
578+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
579+
{
580+
face_linear_interp_x(i,j,k,n,fine_arr,ratio);
581+
});
582+
} else {
583+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
584+
{
585+
fine_arr(i,j,k,n) = face_linear_interp_safe_x(i,j,k,n,safe_fine_arr,ratio);
586+
});
587+
}
563588
}
564589
#if (AMREX_SPACEDIM >= 2)
565590
else if (fine_region.type(1) == IndexType::NODE)
566591
{
567-
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
568-
{
569-
face_linear_interp_y(i,j,k,n,fine_arr,ratio);
570-
});
592+
if (is_safe) {
593+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
594+
{
595+
face_linear_interp_y(i,j,k,n,fine_arr,ratio);
596+
});
597+
} else {
598+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
599+
{
600+
fine_arr(i,j,k,n) = face_linear_interp_safe_y(i,j,k,n,safe_fine_arr,ratio);
601+
});
602+
}
571603
}
572604
#if (AMREX_SPACEDIM == 3)
573605
else
574606
{
575-
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
576-
{
577-
face_linear_interp_z(i,j,k,n,fine_arr,ratio);
578-
});
607+
if (is_safe) {
608+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
609+
{
610+
face_linear_interp_z(i,j,k,n,fine_arr,ratio);
611+
});
612+
} else {
613+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D_FLAG(runon,fine_region,ncomp,i,j,k,n,
614+
{
615+
fine_arr(i,j,k,n) = face_linear_interp_safe_z(i,j,k,n,safe_fine_arr,ratio);
616+
});
617+
}
579618
}
580619
#endif
581620
#endif

Src/Base/AMReX.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,19 @@ amrex::Initialize (int& argc, char**& argv, bool build_parm_parse,
744744
#endif
745745
#endif
746746

747+
if (system::verbose > 0) {
748+
#if defined(HYPRE_DEVELOP_STRING) && defined(HYPRE_BRANCH_NAME)
749+
amrex::Print() << "HYPRE (" << HYPRE_DEVELOP_STRING
750+
<< " - " << HYPRE_BRANCH_NAME
751+
<< " branch) initialized" << '\n';
752+
753+
#elif defined(HYPRE_DEVELOP_STRING) && !defined(HYPRE_BRANCH_NAME)
754+
amrex::Print() << "HYPRE (" << HYPRE_DEVELOP_STRING << ") initialized" << '\n';
755+
756+
#elif defined(HYPRE_RELEASE_VERSION)
757+
amrex::Print() << "HYPRE (" << HYPRE_RELEASE_VERSION << ") initialized" << '\n';
758+
#endif
759+
}
747760
}
748761
#endif
749762

0 commit comments

Comments
 (0)