Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Src/Base/AMReX_MultiFabUtil.H
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,18 @@ namespace amrex
void computeGradient (MultiFab& grad, const Array<MultiFab const*,AMREX_SPACEDIM>& umac,
const Geometry& geom);

#if (AMREX_SPACEDIM == 3)
//! Computes curl of face-centered vector field and stores result at nodes
void computeCurlNodal (MultiFab& curl_nodal,
const Array<MultiFab const*,AMREX_SPACEDIM>& face_vec,
const Geometry& geom);

//! Computes curl of edge-centered vector field and stores result at faces
void computeCurlFace (Array<MultiFab*,AMREX_SPACEDIM>& curl_face,
const Array<MultiFab const*,AMREX_SPACEDIM>& edge_vec,
const Geometry& geom);
#endif

//! Convert iMultiFab to MultiFab
MultiFab ToMultiFab (const iMultiFab& imf);
//! Convert iMultiFab to Long
Expand Down
127 changes: 127 additions & 0 deletions Src/Base/AMReX_MultiFabUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,133 @@ namespace amrex
}
}

#if (AMREX_SPACEDIM == 3)
void computeCurlNodal (MultiFab& curl_nodal,
const Array<MultiFab const*,AMREX_SPACEDIM>& face_vec,
const Geometry& geom)
{
AMREX_ASSERT(curl_nodal.nComp() >= AMREX_SPACEDIM);
AMREX_ASSERT(curl_nodal.is_nodal());
AMREX_ASSERT(face_vec[0]->nComp() == 1);
AMREX_ASSERT(face_vec[1]->nComp() == 1);
AMREX_ASSERT(face_vec[2]->nComp() == 1);
AMREX_ALWAYS_ASSERT_WITH_MESSAGE(face_vec[0]->nGrowVect().allGE(1),
"computeCurlNodal requires x-face inputs with at least one ghost cell");
AMREX_ALWAYS_ASSERT_WITH_MESSAGE(face_vec[1]->nGrowVect().allGE(1),
"computeCurlNodal requires y-face inputs with at least one ghost cell");
AMREX_ALWAYS_ASSERT_WITH_MESSAGE(face_vec[2]->nGrowVect().allGE(1),
"computeCurlNodal requires z-face inputs with at least one ghost cell");

const GpuArray<Real,AMREX_SPACEDIM> dxinv = geom.InvCellSizeArray();

#ifdef AMREX_USE_OMP
#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
for (MFIter mfi(curl_nodal,TilingIfNotGPU()); mfi.isValid(); ++mfi)
{
const Box& nbx = mfi.tilebox();
Array4<Real> const& curlarr = curl_nodal.array(mfi);
Array4<Real const> const& uarr = face_vec[0]->const_array(mfi);
Array4<Real const> const& varr = face_vec[1]->const_array(mfi);
Array4<Real const> const& warr = face_vec[2]->const_array(mfi);

AMREX_HOST_DEVICE_PARALLEL_FOR_3D(nbx, i, j, k,
{
// For node (i,j,k), compute curl using face values that touch this node
// curl_x = dw/dy - dv/dz
curlarr(i,j,k,0) = dxinv[1] * (warr(i-1,j,k) - warr(i-1,j-1,k))
- dxinv[2] * (varr(i-1,j-1,k) - varr(i-1,j-1,k-1));

// curl_y = du/dz - dw/dx
curlarr(i,j,k,1) = dxinv[2] * (uarr(i-1,j-1,k) - uarr(i-1,j-1,k-1))
- dxinv[0] * (warr(i,j-1,k) - warr(i-1,j-1,k));

// curl_z = dv/dx - du/dy
curlarr(i,j,k,2) = dxinv[0] * (varr(i,j-1,k-1) - varr(i-1,j-1,k-1))
- dxinv[1] * (uarr(i-1,j,k-1) - uarr(i-1,j-1,k-1));
});
}
}

void computeCurlFace (Array<MultiFab*,AMREX_SPACEDIM>& curl_face,
const Array<MultiFab const*,AMREX_SPACEDIM>& edge_vec,
const Geometry& geom)
{
// edge_vec contains edge-centered components
// curl_face will contain face-centered curl components
AMREX_ASSERT(edge_vec[0]->nComp() == 1); // x-edges
AMREX_ASSERT(edge_vec[1]->nComp() == 1); // y-edges
AMREX_ASSERT(edge_vec[2]->nComp() == 1); // z-edges
AMREX_ASSERT(curl_face[0]->nComp() == 1); // x-faces
AMREX_ASSERT(curl_face[1]->nComp() == 1); // y-faces
AMREX_ASSERT(curl_face[2]->nComp() == 1); // z-faces

const GpuArray<Real,AMREX_SPACEDIM> dxinv = geom.InvCellSizeArray();

#ifdef AMREX_USE_OMP
#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
for (MFIter mfi(*curl_face[0],TilingIfNotGPU()); mfi.isValid(); ++mfi)
{
// Get face-centered boxes for each component of curl
const Box& xfbx = mfi.tilebox(IntVect(AMREX_D_DECL(1,0,0))); // x-face
const Box& yfbx = mfi.tilebox(IntVect(AMREX_D_DECL(0,1,0))); // y-face
const Box& zfbx = mfi.tilebox(IntVect(AMREX_D_DECL(0,0,1))); // z-face

Array4<Real> const& curl_x = curl_face[0]->array(mfi);
Array4<Real> const& curl_y = curl_face[1]->array(mfi);
Array4<Real> const& curl_z = curl_face[2]->array(mfi);

// edge_vec components:
// edge_vec[0] is on x-edges (j,k are nodal, i is cell-centered)
// edge_vec[1] is on y-edges (i,k are nodal, j is cell-centered)
// edge_vec[2] is on z-edges (i,j are nodal, k is cell-centered)
Array4<Real const> const& Ex = edge_vec[0]->const_array(mfi);
Array4<Real const> const& Ey = edge_vec[1]->const_array(mfi);
Array4<Real const> const& Ez = edge_vec[2]->const_array(mfi);

// Compute x-component of curl on x-faces
// curl_x = dEz/dy - dEy/dz at x-faces
AMREX_HOST_DEVICE_PARALLEL_FOR_3D(xfbx, i, j, k,
{
// Ez values at (i,j,k) and (i,j+1,k) for dEz/dy
Real dEz_dy = dxinv[1] * (Ez(i,j+1,k) - Ez(i,j,k));

// Ey values at (i,j,k) and (i,j,k+1) for dEy/dz
Real dEy_dz = dxinv[2] * (Ey(i,j,k+1) - Ey(i,j,k));

curl_x(i,j,k) = dEz_dy - dEy_dz;
});

// Compute y-component of curl on y-faces
// curl_y = dEx/dz - dEz/dx at y-faces
AMREX_HOST_DEVICE_PARALLEL_FOR_3D(yfbx, i, j, k,
{
// Ex values at (i,j,k) and (i,j,k+1) for dEx/dz
Real dEx_dz = dxinv[2] * (Ex(i,j,k+1) - Ex(i,j,k));

// Ez values at (i,j,k) and (i+1,j,k) for dEz/dx
Real dEz_dx = dxinv[0] * (Ez(i+1,j,k) - Ez(i,j,k));

curl_y(i,j,k) = dEx_dz - dEz_dx;
});

// Compute z-component of curl on z-faces
// curl_z = dEy/dx - dEx/dy at z-faces
AMREX_HOST_DEVICE_PARALLEL_FOR_3D(zfbx, i, j, k,
{
// Ey values at (i,j,k) and (i+1,j,k) for dEy/dx
Real dEy_dx = dxinv[0] * (Ey(i+1,j,k) - Ey(i,j,k));

// Ex values at (i,j,k) and (i,j+1,k) for dEx/dy
Real dEx_dy = dxinv[1] * (Ex(i,j+1,k) - Ex(i,j,k));

curl_z(i,j,k) = dEy_dx - dEx_dy;
});
}
}
#endif

MultiFab periodicShift (MultiFab const& mf, IntVect const& offset,
Periodicity const& period)
{
Expand Down
99 changes: 90 additions & 9 deletions Src/Boundary/AMReX_YAFluxRegister.H
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,6 @@ YAFluxRegisterT<MF>::FineAdd (const MFIter& mfi,
int fluxcomp = srccomp;
std::array<FAB const*,AMREX_SPACEDIM> flux{{AMREX_D_DECL(a_flux[0],a_flux[1],a_flux[2])}};
bool use_gpu = (runon == RunOn::Gpu) && Gpu::inLaunchRegion();
amrex::ignore_unused(use_gpu);
std::array<FAB,AMREX_SPACEDIM> ftmp;
if (fbx != tbx) {
AMREX_ASSERT(!use_gpu);
Expand Down Expand Up @@ -478,10 +477,51 @@ YAFluxRegisterT<MF>::FineAdd (const MFIter& mfi,
auto dtdxs = dtdx[idim];
int dirside = idim*2+side;
Array4<T const> farr = f->const_array(fluxcomp);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(runon, lobx_is, tmpbox,
{
yafluxreg_fineadd(tmpbox, d, farr, dtdxs, numcomp, dirside, rr);
});
if (use_gpu) {
#if (AMREX_SPACEDIM == 3)
amrex::ParallelFor(lobx_is, numcomp,
[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
{
yafluxreg_fineadd(i,j,k,n,d,farr,dtdxs,dirside,rr);
});
#elif (AMREX_SPACEDIM == 2)
amrex::ParallelFor(lobx_is, numcomp,
[=] AMREX_GPU_DEVICE (int i, int j, int n) noexcept
{
yafluxreg_fineadd(i,j,n,d,farr,dtdxs,dirside,rr);
});
#else
amrex::ParallelFor(lobx_is, numcomp,
[=] AMREX_GPU_DEVICE (int i, int n) noexcept
{
yafluxreg_fineadd(i,n,d,farr,dtdxs,dirside,rr);
});
#endif
} else {
const auto lo = amrex::lbound(lobx_is);
const auto hi = amrex::ubound(lobx_is);
for (int n = 0; n < numcomp; ++n) {
#if (AMREX_SPACEDIM == 3)
for (int k = lo.z; k <= hi.z; ++k) {
for (int j = lo.y; j <= hi.y; ++j) {
for (int i = lo.x; i <= hi.x; ++i) {
yafluxreg_fineadd(i,j,k,n,d,farr,dtdxs,dirside,rr);
}
}
}
#elif (AMREX_SPACEDIM == 2)
for (int j = lo.y; j <= hi.y; ++j) {
for (int i = lo.x; i <= hi.x; ++i) {
yafluxreg_fineadd(i,j,n,d,farr,dtdxs,dirside,rr);
}
}
#else
for (int i = lo.x; i <= hi.x; ++i) {
yafluxreg_fineadd(i,n,d,farr,dtdxs,dirside,rr);
}
#endif
}
}
}
}
{
Expand All @@ -493,10 +533,51 @@ YAFluxRegisterT<MF>::FineAdd (const MFIter& mfi,
auto dtdxs = dtdx[idim];
int dirside = idim*2+side;
Array4<T const> farr = f->const_array(fluxcomp);
AMREX_LAUNCH_HOST_DEVICE_LAMBDA_FLAG(runon, hibx_is, tmpbox,
{
yafluxreg_fineadd(tmpbox, d, farr, dtdxs, numcomp, dirside, rr);
});
if (use_gpu) {
#if (AMREX_SPACEDIM == 3)
amrex::ParallelFor(hibx_is, numcomp,
[=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
{
yafluxreg_fineadd(i,j,k,n,d,farr,dtdxs,dirside,rr);
});
#elif (AMREX_SPACEDIM == 2)
amrex::ParallelFor(hibx_is, numcomp,
[=] AMREX_GPU_DEVICE (int i, int j, int n) noexcept
{
yafluxreg_fineadd(i,j,n,d,farr,dtdxs,dirside,rr);
});
#else
amrex::ParallelFor(hibx_is, numcomp,
[=] AMREX_GPU_DEVICE (int i, int n) noexcept
{
yafluxreg_fineadd(i,n,d,farr,dtdxs,dirside,rr);
});
#endif
} else {
const auto lo = amrex::lbound(hibx_is);
const auto hi = amrex::ubound(hibx_is);
for (int n = 0; n < numcomp; ++n) {
#if (AMREX_SPACEDIM == 3)
for (int k = lo.z; k <= hi.z; ++k) {
for (int j = lo.y; j <= hi.y; ++j) {
for (int i = lo.x; i <= hi.x; ++i) {
yafluxreg_fineadd(i,j,k,n,d,farr,dtdxs,dirside,rr);
}
}
}
#elif (AMREX_SPACEDIM == 2)
for (int j = lo.y; j <= hi.y; ++j) {
for (int i = lo.x; i <= hi.x; ++i) {
yafluxreg_fineadd(i,j,n,d,farr,dtdxs,dirside,rr);
}
}
#else
for (int i = lo.x; i <= hi.x; ++i) {
yafluxreg_fineadd(i,n,d,farr,dtdxs,dirside,rr);
}
#endif
}
}
}
}
}
Expand Down
41 changes: 15 additions & 26 deletions Src/Boundary/AMReX_YAFluxRegister_1D_K.H
Original file line number Diff line number Diff line change
Expand Up @@ -33,34 +33,23 @@ void yafluxreg_crseadd (Box const& bx, Array4<T> const& d, Array4<int const> con

template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void yafluxreg_fineadd (Box const& bx, Array4<T> const& d, Array4<T const> const& f,
T dtdx, int nc, int dirside, Dim3 const& rr) noexcept
T yafluxreg_fineadd_delta (int i, int n,
Array4<T const> const& f, T scale,
int dirside, Dim3 const& rr) noexcept
{
const auto lo = amrex::lbound(bx);
const int side = dirside % 2;
const int ii = (side == 0) ? (i+1) * rr.x : i * rr.x;
const T sign = (side == 0) ? T(-1.0) : T(1.0);
return sign * scale * f(ii, 0, 0, n);
}

switch (dirside) {
case 0 :
{
for (int n = 0; n < nc; ++n) {
const int i = lo.x;
const int ii = (i+1)*rr.x;
T* AMREX_RESTRICT dp = &(d(i,0,0,n));
T tmp = -dtdx*f(ii,0,0,n);
HostDevice::Atomic::Add(dp, tmp);
}
break;
}
default:
{
for (int n = 0; n < nc; ++n) {
const int i = lo.x;
const int ii = i*rr.x;
T* AMREX_RESTRICT dp = &(d(i,0,0,n));
T tmp = dtdx*f(ii,0,0,n);
HostDevice::Atomic::Add(dp, tmp);
}
}
}
template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void yafluxreg_fineadd (int i, int n,
Array4<T> const& d, Array4<T const> const& f,
T scale, int dirside, Dim3 const& rr) noexcept
{
d(i,0,0,n) += yafluxreg_fineadd_delta(i,n,f,scale,dirside,rr);
}

}
Expand Down
Loading
Loading