Skip to content

Commit 098316d

Browse files
committed
Updated applyBC to use parallelFor on tags for GPU builds
1 parent 04afe76 commit 098316d

File tree

3 files changed

+439
-67
lines changed

3 files changed

+439
-67
lines changed

Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.H

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,26 @@ protected:
164164
[[nodiscard]] bool supportRobinBC () const noexcept override { return true; }
165165
};
166166

167+
// Tag for applyBC EB version
168+
template <typename T>
169+
struct MLMGABCEBTag {
170+
amrex::Array4<T> fab;
171+
amrex::Array4<T const> bcval;
172+
amrex::Array4<int const> mask;
173+
amrex::Array4<T const> area;
174+
T bcloc;
175+
amrex::Box bx;
176+
amrex::BoundCond bctype;
177+
int blen;
178+
int comp;
179+
int dir;
180+
int is_eb;
181+
int side;
182+
183+
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
184+
amrex::Box const& box() const noexcept { return bx; }
185+
};
186+
167187
}
168188

169189
#endif

Src/LinearSolvers/MLMG/AMReX_MLEBABecLap.cpp

Lines changed: 206 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,10 +1049,13 @@ MLEBABecLap::applyBC (int amrlev, int mglev, MultiFab& in, BCMode bc_mode, State
10491049

10501050
FArrayBox foofab(Box::TheUnitBox(),ncomp);
10511051
const auto& foo = foofab.array();
1052-
1052+
#ifdef AMREX_USE_GPU
1053+
Vector<MLMGABCEBTag<RT>> ebtags;
1054+
ebtags.reserve(in.local_size() * 2 * AMREX_SPACEDIM*ncomp);
1055+
bool run_on_gpu = Gpu::inLaunchRegion();
1056+
#endif
10531057
MFItInfo mfi_info;
10541058
if (Gpu::notInLaunchRegion()) { mfi_info.SetDynamic(true); }
1055-
10561059
#ifdef AMREX_USE_OMP
10571060
#pragma omp parallel if (Gpu::notInLaunchRegion())
10581061
#endif
@@ -1090,89 +1093,225 @@ MLEBABecLap::applyBC (int amrlev, int mglev, MultiFab& in, BCMode bc_mode, State
10901093
if (fabtyp == FabType::regular)
10911094
{
10921095
if (idim == 0) {
1093-
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1094-
blo, tboxlo, {
1095-
mllinop_apply_bc_x(0, tboxlo, blen, iofab, mlo,
1096-
bctlo, bcllo, bvlo,
1097-
imaxorder, dxi, flagbc, icomp);
1098-
},
1099-
bhi, tboxhi, {
1100-
mllinop_apply_bc_x(1, tboxhi, blen, iofab, mhi,
1101-
bcthi, bclhi, bvhi,
1102-
imaxorder, dxi, flagbc, icomp);
1103-
});
1104-
} else if (idim == 1) {
1105-
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1106-
blo, tboxlo, {
1107-
mllinop_apply_bc_y(0, tboxlo, blen, iofab, mlo,
1108-
bctlo, bcllo, bvlo,
1109-
imaxorder, dyi, flagbc, icomp);
1110-
},
1111-
bhi, tboxhi, {
1112-
mllinop_apply_bc_y(1, tboxhi, blen, iofab, mhi,
1113-
bcthi, bclhi, bvhi,
1114-
imaxorder, dyi, flagbc, icomp);
1115-
});
1116-
} else {
1117-
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1118-
blo, tboxlo, {
1119-
mllinop_apply_bc_z(0, tboxlo, blen, iofab, mlo,
1120-
bctlo, bcllo, bvlo,
1121-
imaxorder, dzi, flagbc, icomp);
1122-
},
1123-
bhi, tboxhi, {
1124-
mllinop_apply_bc_z(1, tboxhi, blen, iofab, mhi,
1125-
bcthi, bclhi, bvhi,
1126-
imaxorder, dzi, flagbc, icomp);
1127-
});
1128-
}
1129-
}
1130-
else // irregular
1131-
{
1132-
const auto& ap = area[idim]->const_array(mfi);
1133-
const auto& mask = ccmask.const_array(mfi);
1134-
if (idim == 0) {
1135-
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1136-
blo, tboxlo, {
1137-
mlebabeclap_apply_bc_x(0, tboxlo, blen, iofab, mask, ap,
1096+
#ifdef AMREX_USE_GPU
1097+
if (run_on_gpu) {
1098+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1099+
iofab,bvlo, mlo, foo,
1100+
bcllo, blo, bctlo, blen,
1101+
icomp, 0, 0, 0
1102+
});
1103+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1104+
iofab,bvhi, mhi, foo,
1105+
bclhi, bhi, bcthi, blen,
1106+
icomp, 0, 0,1
1107+
});
1108+
} else
1109+
#endif
1110+
{
1111+
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1112+
blo, tboxlo, {
1113+
mllinop_apply_bc_x(0, tboxlo, blen, iofab, mlo,
11381114
bctlo, bcllo, bvlo,
11391115
imaxorder, dxi, flagbc, icomp);
1140-
},
1141-
bhi, tboxhi, {
1142-
mlebabeclap_apply_bc_x(1, tboxhi, blen, iofab, mask, ap,
1116+
},
1117+
bhi, tboxhi, {
1118+
mllinop_apply_bc_x(1, tboxhi, blen, iofab, mhi,
11431119
bcthi, bclhi, bvhi,
11441120
imaxorder, dxi, flagbc, icomp);
1145-
});
1146-
} else if (idim == 1) {
1147-
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1148-
blo, tboxlo, {
1149-
mlebabeclap_apply_bc_y(0, tboxlo, blen, iofab, mask, ap,
1121+
});
1122+
}
1123+
} else if (idim == 1) {
1124+
#ifdef AMREX_USE_GPU
1125+
if (run_on_gpu) {
1126+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1127+
iofab,bvlo, mlo, foo,
1128+
bcllo, blo, bctlo, blen,
1129+
icomp,1, 0, 0
1130+
});
1131+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1132+
iofab,bvhi, mhi, foo,
1133+
bclhi, bhi, bcthi, blen,
1134+
icomp, 1, 0,1
1135+
});
1136+
} else
1137+
#endif
1138+
{
1139+
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1140+
blo, tboxlo, {
1141+
mllinop_apply_bc_y(0, tboxlo, blen, iofab, mlo,
11501142
bctlo, bcllo, bvlo,
11511143
imaxorder, dyi, flagbc, icomp);
1152-
},
1153-
bhi, tboxhi, {
1154-
mlebabeclap_apply_bc_y(1, tboxhi, blen, iofab, mask, ap,
1144+
},
1145+
bhi, tboxhi, {
1146+
mllinop_apply_bc_y(1, tboxhi, blen, iofab, mhi,
11551147
bcthi, bclhi, bvhi,
11561148
imaxorder, dyi, flagbc, icomp);
1157-
});
1158-
} else {
1159-
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1160-
blo, tboxlo, {
1161-
mlebabeclap_apply_bc_z(0, tboxlo, blen, iofab, mask, ap,
1149+
});
1150+
}
1151+
} else {
1152+
#ifdef AMREX_USE_GPU
1153+
if (run_on_gpu) {
1154+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1155+
iofab,bvlo, mlo, foo,
1156+
bcllo, blo, bctlo, blen,
1157+
icomp, 2, 0, 0
1158+
});
1159+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1160+
iofab,bvhi, mhi, foo,
1161+
bclhi, bhi, bcthi, blen,
1162+
icomp, 2, 0,1
1163+
});
1164+
} else
1165+
#endif
1166+
{
1167+
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1168+
blo, tboxlo, {
1169+
mllinop_apply_bc_z(0, tboxlo, blen, iofab, mlo,
11621170
bctlo, bcllo, bvlo,
11631171
imaxorder, dzi, flagbc, icomp);
1164-
},
1165-
bhi, tboxhi, {
1166-
mlebabeclap_apply_bc_z(1, tboxhi, blen, iofab, mask, ap,
1172+
},
1173+
bhi, tboxhi, {
1174+
mllinop_apply_bc_z(1, tboxhi, blen, iofab, mhi,
11671175
bcthi, bclhi, bvhi,
11681176
imaxorder, dzi, flagbc, icomp);
1169-
});
1177+
});
1178+
}
11701179
}
11711180
}
1181+
else // irregular
1182+
{
1183+
const auto& ap = area[idim]->const_array(mfi);
1184+
const auto& mask = ccmask.const_array(mfi);
1185+
if (idim == 0) {
1186+
#ifdef AMREX_USE_GPU
1187+
if (run_on_gpu) {
1188+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1189+
iofab,bvlo, mask, ap,
1190+
bcllo, blo, bctlo, blen,
1191+
icomp, 0, 1, 0
1192+
});
1193+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1194+
iofab,bvhi, mask, ap,
1195+
bclhi, bhi, bcthi, blen,
1196+
icomp, 0, 1, 1
1197+
});
1198+
} else
1199+
#endif
1200+
{
1201+
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1202+
blo, tboxlo, {
1203+
mlebabeclap_apply_bc_x(0, tboxlo, blen, iofab, mask, ap,
1204+
bctlo, bcllo, bvlo,
1205+
imaxorder, dxi, flagbc, icomp);
1206+
},
1207+
bhi, tboxhi, {
1208+
mlebabeclap_apply_bc_x(1, tboxhi, blen, iofab, mask, ap,
1209+
bcthi, bclhi, bvhi,
1210+
imaxorder, dxi, flagbc, icomp);
1211+
});
1212+
}
1213+
} else if (idim == 1) {
1214+
#ifdef AMREX_USE_GPU
1215+
if (run_on_gpu) {
1216+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1217+
iofab,bvlo, mask, ap,
1218+
bcllo, blo, bctlo, blen,
1219+
icomp, 0, 1, 0
1220+
});
1221+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1222+
iofab,bvhi, mask, ap,
1223+
bclhi, bhi, bcthi, blen,
1224+
icomp, 0, 1, 1
1225+
});
1226+
} else
1227+
#endif
1228+
{
1229+
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1230+
blo, tboxlo, {
1231+
mlebabeclap_apply_bc_y(0, tboxlo, blen, iofab, mask, ap,
1232+
bctlo, bcllo, bvlo,
1233+
imaxorder, dyi, flagbc, icomp);
1234+
},
1235+
bhi, tboxhi, {
1236+
mlebabeclap_apply_bc_y(1, tboxhi, blen, iofab, mask, ap,
1237+
bcthi, bclhi, bvhi,
1238+
imaxorder, dyi, flagbc, icomp);
1239+
});
1240+
}
1241+
} else {
1242+
#ifdef AMREX_USE_GPU
1243+
if (run_on_gpu) {
1244+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1245+
iofab,bvlo, mask, ap,
1246+
bcllo, blo, bctlo, blen,
1247+
icomp, 0, 1, 0
1248+
});
1249+
ebtags.emplace_back(MLMGABCEBTag<RT>{
1250+
iofab,bvhi, mask, ap,
1251+
bclhi, bhi, bcthi, blen,
1252+
icomp, 0, 1, 1
1253+
});
1254+
} else
1255+
#endif
1256+
{
1257+
AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1258+
blo, tboxlo, {
1259+
mlebabeclap_apply_bc_z(0, tboxlo, blen, iofab, mask, ap,
1260+
bctlo, bcllo, bvlo,
1261+
imaxorder, dzi, flagbc, icomp);
1262+
},
1263+
bhi, tboxhi, {
1264+
mlebabeclap_apply_bc_z(1, tboxhi, blen, iofab, mask, ap,
1265+
bcthi, bclhi, bvhi,
1266+
imaxorder, dzi, flagbc, icomp);
1267+
});
1268+
}
1269+
}
1270+
}
11721271
}
11731272
}
11741273
}
11751274
}
1275+
1276+
#ifdef AMREX_USE_GPU
1277+
amrex::ParallelFor(
1278+
ebtags, [=] AMREX_GPU_DEVICE (int i, int j, int k, MLMGABCEBTag<RT> const& tag) noexcept
1279+
{
1280+
if (tag.is_eb == 0) {
1281+
if (tag.dir == 0) {
1282+
mllinop_apply_bc_x(tag.side, i, j, k, tag.blen, tag.fab,
1283+
tag.mask, tag.bctype, tag.bcloc, tag.bcval, imaxorder, dxi, flagbc,
1284+
tag.comp);
1285+
} else if (tag.dir == 1) {
1286+
mllinop_apply_bc_y(tag.side, i, j, k, tag.blen, tag.fab,
1287+
tag.mask, tag.bctype, tag.bcloc, tag.bcval, imaxorder, dyi, flagbc,
1288+
tag.comp);
1289+
}
1290+
#if (AMREX_SPACEDIM == 3)
1291+
else if (tag.dir == 3) {
1292+
mllinop_apply_bc_z(tag.side, i, j, k, tag.blen, tag.fab,
1293+
tag.mask, tag.bctype, tag.bcloc, tag.bcval, imaxorder, dzi, flagbc,
1294+
tag.comp);
1295+
}
1296+
#endif
1297+
} else {
1298+
if (tag.dir == 0) {
1299+
mlebabeclap_apply_bc_x(tag.side, i, j, k, tag.blen, tag.fab, tag.mask, tag.area,
1300+
tag.bctype, tag.bcloc, tag.bcval, imaxorder, dxi, flagbc, tag.comp);
1301+
} else if (tag.dir == 1) {
1302+
mlebabeclap_apply_bc_y(tag.side, i, j, k, tag.blen, tag.fab, tag.mask, tag.area,
1303+
tag.bctype, tag.bcloc, tag.bcval, imaxorder, dyi, flagbc, tag.comp);
1304+
}
1305+
#if (AMREX_SPACEDIM == 3)
1306+
else if (tag.dir == 3) {
1307+
mlebabeclap_apply_bc_z(tag.side, i, j, k, tag.blen, tag.fab, tag.mask, tag.area,
1308+
tag.bctype, tag.bcloc, tag.bcval, imaxorder, dzi, flagbc, tag.comp);
1309+
}
1310+
#endif
1311+
}
1312+
}
1313+
);
1314+
#endif
11761315
}
11771316

11781317
void

0 commit comments

Comments
 (0)