@@ -1049,10 +1049,13 @@ MLEBABecLap::applyBC (int amrlev, int mglev, MultiFab& in, BCMode bc_mode, State
1049
1049
1050
1050
FArrayBox foofab (Box::TheUnitBox (),ncomp);
1051
1051
const auto & foo = foofab.array ();
1052
-
1052
+ #ifdef AMREX_USE_GPU
1053
+ Vector<MLMGABCEBTag<RT>> ebtags;
1054
+ ebtags.reserve (in.local_size () * 2 * AMREX_SPACEDIM*ncomp);
1055
+ bool run_on_gpu = Gpu::inLaunchRegion ();
1056
+ #endif
1053
1057
MFItInfo mfi_info;
1054
1058
if (Gpu::notInLaunchRegion ()) { mfi_info.SetDynamic (true ); }
1055
-
1056
1059
#ifdef AMREX_USE_OMP
1057
1060
#pragma omp parallel if (Gpu::notInLaunchRegion())
1058
1061
#endif
@@ -1090,89 +1093,225 @@ MLEBABecLap::applyBC (int amrlev, int mglev, MultiFab& in, BCMode bc_mode, State
1090
1093
if (fabtyp == FabType::regular)
1091
1094
{
1092
1095
if (idim == 0 ) {
1093
- AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1094
- blo, tboxlo, {
1095
- mllinop_apply_bc_x (0 , tboxlo, blen, iofab, mlo,
1096
- bctlo, bcllo, bvlo,
1097
- imaxorder, dxi, flagbc, icomp);
1098
- },
1099
- bhi, tboxhi, {
1100
- mllinop_apply_bc_x (1 , tboxhi, blen, iofab, mhi,
1101
- bcthi, bclhi, bvhi,
1102
- imaxorder, dxi, flagbc, icomp);
1103
- });
1104
- } else if (idim == 1 ) {
1105
- AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1106
- blo, tboxlo, {
1107
- mllinop_apply_bc_y (0 , tboxlo, blen, iofab, mlo,
1108
- bctlo, bcllo, bvlo,
1109
- imaxorder, dyi, flagbc, icomp);
1110
- },
1111
- bhi, tboxhi, {
1112
- mllinop_apply_bc_y (1 , tboxhi, blen, iofab, mhi,
1113
- bcthi, bclhi, bvhi,
1114
- imaxorder, dyi, flagbc, icomp);
1115
- });
1116
- } else {
1117
- AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1118
- blo, tboxlo, {
1119
- mllinop_apply_bc_z (0 , tboxlo, blen, iofab, mlo,
1120
- bctlo, bcllo, bvlo,
1121
- imaxorder, dzi, flagbc, icomp);
1122
- },
1123
- bhi, tboxhi, {
1124
- mllinop_apply_bc_z (1 , tboxhi, blen, iofab, mhi,
1125
- bcthi, bclhi, bvhi,
1126
- imaxorder, dzi, flagbc, icomp);
1127
- });
1128
- }
1129
- }
1130
- else // irregular
1131
- {
1132
- const auto & ap = area[idim]->const_array (mfi);
1133
- const auto & mask = ccmask.const_array (mfi);
1134
- if (idim == 0 ) {
1135
- AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1136
- blo, tboxlo, {
1137
- mlebabeclap_apply_bc_x (0 , tboxlo, blen, iofab, mask, ap,
1096
+ #ifdef AMREX_USE_GPU
1097
+ if (run_on_gpu) {
1098
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1099
+ iofab,bvlo, mlo, foo,
1100
+ bcllo, blo, bctlo, blen,
1101
+ icomp, 0 , 0 , 0
1102
+ });
1103
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1104
+ iofab,bvhi, mhi, foo,
1105
+ bclhi, bhi, bcthi, blen,
1106
+ icomp, 0 , 0 ,1
1107
+ });
1108
+ } else
1109
+ #endif
1110
+ {
1111
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1112
+ blo, tboxlo, {
1113
+ mllinop_apply_bc_x (0 , tboxlo, blen, iofab, mlo,
1138
1114
bctlo, bcllo, bvlo,
1139
1115
imaxorder, dxi, flagbc, icomp);
1140
- },
1141
- bhi, tboxhi, {
1142
- mlebabeclap_apply_bc_x (1 , tboxhi, blen, iofab, mask, ap ,
1116
+ },
1117
+ bhi, tboxhi, {
1118
+ mllinop_apply_bc_x (1 , tboxhi, blen, iofab, mhi ,
1143
1119
bcthi, bclhi, bvhi,
1144
1120
imaxorder, dxi, flagbc, icomp);
1145
- });
1146
- } else if (idim == 1 ) {
1147
- AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1148
- blo, tboxlo, {
1149
- mlebabeclap_apply_bc_y (0 , tboxlo, blen, iofab, mask, ap,
1121
+ });
1122
+ }
1123
+ } else if (idim == 1 ) {
1124
+ #ifdef AMREX_USE_GPU
1125
+ if (run_on_gpu) {
1126
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1127
+ iofab,bvlo, mlo, foo,
1128
+ bcllo, blo, bctlo, blen,
1129
+ icomp,1 , 0 , 0
1130
+ });
1131
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1132
+ iofab,bvhi, mhi, foo,
1133
+ bclhi, bhi, bcthi, blen,
1134
+ icomp, 1 , 0 ,1
1135
+ });
1136
+ } else
1137
+ #endif
1138
+ {
1139
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1140
+ blo, tboxlo, {
1141
+ mllinop_apply_bc_y (0 , tboxlo, blen, iofab, mlo,
1150
1142
bctlo, bcllo, bvlo,
1151
1143
imaxorder, dyi, flagbc, icomp);
1152
- },
1153
- bhi, tboxhi, {
1154
- mlebabeclap_apply_bc_y (1 , tboxhi, blen, iofab, mask, ap ,
1144
+ },
1145
+ bhi, tboxhi, {
1146
+ mllinop_apply_bc_y (1 , tboxhi, blen, iofab, mhi ,
1155
1147
bcthi, bclhi, bvhi,
1156
1148
imaxorder, dyi, flagbc, icomp);
1157
- });
1158
- } else {
1159
- AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1160
- blo, tboxlo, {
1161
- mlebabeclap_apply_bc_z (0 , tboxlo, blen, iofab, mask, ap,
1149
+ });
1150
+ }
1151
+ } else {
1152
+ #ifdef AMREX_USE_GPU
1153
+ if (run_on_gpu) {
1154
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1155
+ iofab,bvlo, mlo, foo,
1156
+ bcllo, blo, bctlo, blen,
1157
+ icomp, 2 , 0 , 0
1158
+ });
1159
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1160
+ iofab,bvhi, mhi, foo,
1161
+ bclhi, bhi, bcthi, blen,
1162
+ icomp, 2 , 0 ,1
1163
+ });
1164
+ } else
1165
+ #endif
1166
+ {
1167
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1168
+ blo, tboxlo, {
1169
+ mllinop_apply_bc_z (0 , tboxlo, blen, iofab, mlo,
1162
1170
bctlo, bcllo, bvlo,
1163
1171
imaxorder, dzi, flagbc, icomp);
1164
- },
1165
- bhi, tboxhi, {
1166
- mlebabeclap_apply_bc_z (1 , tboxhi, blen, iofab, mask, ap ,
1172
+ },
1173
+ bhi, tboxhi, {
1174
+ mllinop_apply_bc_z (1 , tboxhi, blen, iofab, mhi ,
1167
1175
bcthi, bclhi, bvhi,
1168
1176
imaxorder, dzi, flagbc, icomp);
1169
- });
1177
+ });
1178
+ }
1170
1179
}
1171
1180
}
1181
+ else // irregular
1182
+ {
1183
+ const auto & ap = area[idim]->const_array (mfi);
1184
+ const auto & mask = ccmask.const_array (mfi);
1185
+ if (idim == 0 ) {
1186
+ #ifdef AMREX_USE_GPU
1187
+ if (run_on_gpu) {
1188
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1189
+ iofab,bvlo, mask, ap,
1190
+ bcllo, blo, bctlo, blen,
1191
+ icomp, 0 , 1 , 0
1192
+ });
1193
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1194
+ iofab,bvhi, mask, ap,
1195
+ bclhi, bhi, bcthi, blen,
1196
+ icomp, 0 , 1 , 1
1197
+ });
1198
+ } else
1199
+ #endif
1200
+ {
1201
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1202
+ blo, tboxlo, {
1203
+ mlebabeclap_apply_bc_x (0 , tboxlo, blen, iofab, mask, ap,
1204
+ bctlo, bcllo, bvlo,
1205
+ imaxorder, dxi, flagbc, icomp);
1206
+ },
1207
+ bhi, tboxhi, {
1208
+ mlebabeclap_apply_bc_x (1 , tboxhi, blen, iofab, mask, ap,
1209
+ bcthi, bclhi, bvhi,
1210
+ imaxorder, dxi, flagbc, icomp);
1211
+ });
1212
+ }
1213
+ } else if (idim == 1 ) {
1214
+ #ifdef AMREX_USE_GPU
1215
+ if (run_on_gpu) {
1216
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1217
+ iofab,bvlo, mask, ap,
1218
+ bcllo, blo, bctlo, blen,
1219
+ icomp, 0 , 1 , 0
1220
+ });
1221
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1222
+ iofab,bvhi, mask, ap,
1223
+ bclhi, bhi, bcthi, blen,
1224
+ icomp, 0 , 1 , 1
1225
+ });
1226
+ } else
1227
+ #endif
1228
+ {
1229
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1230
+ blo, tboxlo, {
1231
+ mlebabeclap_apply_bc_y (0 , tboxlo, blen, iofab, mask, ap,
1232
+ bctlo, bcllo, bvlo,
1233
+ imaxorder, dyi, flagbc, icomp);
1234
+ },
1235
+ bhi, tboxhi, {
1236
+ mlebabeclap_apply_bc_y (1 , tboxhi, blen, iofab, mask, ap,
1237
+ bcthi, bclhi, bvhi,
1238
+ imaxorder, dyi, flagbc, icomp);
1239
+ });
1240
+ }
1241
+ } else {
1242
+ #ifdef AMREX_USE_GPU
1243
+ if (run_on_gpu) {
1244
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1245
+ iofab,bvlo, mask, ap,
1246
+ bcllo, blo, bctlo, blen,
1247
+ icomp, 0 , 1 , 0
1248
+ });
1249
+ ebtags.emplace_back (MLMGABCEBTag<RT>{
1250
+ iofab,bvhi, mask, ap,
1251
+ bclhi, bhi, bcthi, blen,
1252
+ icomp, 0 , 1 , 1
1253
+ });
1254
+ } else
1255
+ #endif
1256
+ {
1257
+ AMREX_LAUNCH_HOST_DEVICE_LAMBDA (
1258
+ blo, tboxlo, {
1259
+ mlebabeclap_apply_bc_z (0 , tboxlo, blen, iofab, mask, ap,
1260
+ bctlo, bcllo, bvlo,
1261
+ imaxorder, dzi, flagbc, icomp);
1262
+ },
1263
+ bhi, tboxhi, {
1264
+ mlebabeclap_apply_bc_z (1 , tboxhi, blen, iofab, mask, ap,
1265
+ bcthi, bclhi, bvhi,
1266
+ imaxorder, dzi, flagbc, icomp);
1267
+ });
1268
+ }
1269
+ }
1270
+ }
1172
1271
}
1173
1272
}
1174
1273
}
1175
1274
}
1275
+
1276
+ #ifdef AMREX_USE_GPU
1277
+ amrex::ParallelFor (
1278
+ ebtags, [=] AMREX_GPU_DEVICE (int i, int j, int k, MLMGABCEBTag<RT> const & tag) noexcept
1279
+ {
1280
+ if (tag.is_eb == 0 ) {
1281
+ if (tag.dir == 0 ) {
1282
+ mllinop_apply_bc_x (tag.side , i, j, k, tag.blen , tag.fab ,
1283
+ tag.mask , tag.bctype , tag.bcloc , tag.bcval , imaxorder, dxi, flagbc,
1284
+ tag.comp );
1285
+ } else if (tag.dir == 1 ) {
1286
+ mllinop_apply_bc_y (tag.side , i, j, k, tag.blen , tag.fab ,
1287
+ tag.mask , tag.bctype , tag.bcloc , tag.bcval , imaxorder, dyi, flagbc,
1288
+ tag.comp );
1289
+ }
1290
+ #if (AMREX_SPACEDIM == 3)
1291
+ else if (tag.dir == 3 ) {
1292
+ mllinop_apply_bc_z (tag.side , i, j, k, tag.blen , tag.fab ,
1293
+ tag.mask , tag.bctype , tag.bcloc , tag.bcval , imaxorder, dzi, flagbc,
1294
+ tag.comp );
1295
+ }
1296
+ #endif
1297
+ } else {
1298
+ if (tag.dir == 0 ) {
1299
+ mlebabeclap_apply_bc_x (tag.side , i, j, k, tag.blen , tag.fab , tag.mask , tag.area ,
1300
+ tag.bctype , tag.bcloc , tag.bcval , imaxorder, dxi, flagbc, tag.comp );
1301
+ } else if (tag.dir == 1 ) {
1302
+ mlebabeclap_apply_bc_y (tag.side , i, j, k, tag.blen , tag.fab , tag.mask , tag.area ,
1303
+ tag.bctype , tag.bcloc , tag.bcval , imaxorder, dyi, flagbc, tag.comp );
1304
+ }
1305
+ #if (AMREX_SPACEDIM == 3)
1306
+ else if (tag.dir == 3 ) {
1307
+ mlebabeclap_apply_bc_z (tag.side , i, j, k, tag.blen , tag.fab , tag.mask , tag.area ,
1308
+ tag.bctype , tag.bcloc , tag.bcval , imaxorder, dzi, flagbc, tag.comp );
1309
+ }
1310
+ #endif
1311
+ }
1312
+ }
1313
+ );
1314
+ #endif
1176
1315
}
1177
1316
1178
1317
void
0 commit comments