Skip to content

Commit 6319d8f

Browse files
committed
view alloc fixes for P3 small kernels
1 parent 68f648a commit 6319d8f

File tree

7 files changed

+312
-117
lines changed

7 files changed

+312
-117
lines changed

components/eamxx/src/physics/p3/disp/p3_main_impl_disp.cpp

Lines changed: 99 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,16 @@ ::p3_main_init_disp(
3232
const uview_2d<Spack>& qv_supersat_i, const uview_2d<Spack>& qtend_ignore, const uview_2d<Spack>& ntend_ignore, const uview_2d<Spack>& mu_c,
3333
const uview_2d<Spack>& lamc, const uview_2d<Spack>& rho_qi, const uview_2d<Spack>& qv2qi_depos_tend, const uview_2d<Spack>& precip_total_tend,
3434
const uview_2d<Spack>& nevapr, const uview_2d<Spack>& precip_liq_flux, const uview_2d<Spack>& precip_ice_flux)
35-
{
35+
{
3636
using ExeSpace = typename KT::ExeSpace;
3737
const auto policy = ekat::ExeSpaceUtils<ExeSpace>::get_default_team_policy(nj, nk_pack);
3838

3939
Kokkos::parallel_for("p3_main_init",
4040
policy, KOKKOS_LAMBDA(const MemberType& team) {
41-
42-
const Int i = team.league_rank();
43-
precip_liq_surf(i) = 0;
44-
precip_ice_surf(i) = 0;
41+
42+
const Int i = team.league_rank();
43+
precip_liq_surf(i) = 0;
44+
precip_ice_surf(i) = 0;
4545

4646
Kokkos::parallel_for(
4747
Kokkos::TeamVectorRange(team, nk_pack), [&] (Int k) {
@@ -107,6 +107,7 @@ ::p3_main_internal_disp(
107107
const P3Infrastructure& infrastructure,
108108
const P3HistoryOnly& history_only,
109109
const P3LookupTables& lookup_tables,
110+
const P3Temporaries& temporaries,
110111
const WorkspaceManager& workspace_mgr,
111112
Int nj,
112113
Int nk,
@@ -131,87 +132,99 @@ ::p3_main_internal_disp(
131132
view_1d<bool> nucleationPossible("nucleationPossible", nj);
132133
view_1d<bool> hydrometeorsPresent("hydrometeorsPresent", nj);
133134

134-
//
135-
// Create temporary variables needed for p3
136-
//
137-
view_2d<Spack>
138-
mu_r("mu_r", nj, nk_pack), // shape parameter of rain
139-
T_atm("T_atm", nj, nk_pack), // temperature at the beginning of the microphysics step [K]
140-
141-
// 2D size distribution and fallspeed parameters
142-
lamr("lamr", nj, nk_pack), logn0r("logn0r", nj, nk_pack), nu("nu", nj, nk_pack),
143-
cdist("cdist", nj, nk_pack), cdist1("cdist1", nj, nk_pack), cdistr("cdistr", nj, nk_pack),
144-
145-
// Variables needed for in-cloud calculations
146-
// Inverse cloud fractions (1/cld)
147-
inv_cld_frac_i("inv_cld_frac_i", nj, nk_pack), inv_cld_frac_l("inv_cld_frac_l", nj, nk_pack), inv_cld_frac_r("inv_cld_frac_r", nj, nk_pack),
148-
// In cloud mass-mixing ratios
149-
qc_incld("qc_incld", nj, nk_pack), qr_incld("qr_incld", nj, nk_pack), qi_incld("qi_incld", nj, nk_pack), qm_incld("qm_incld", nj, nk_pack),
150-
// In cloud number concentrations
151-
nc_incld("nc_incld", nj, nk_pack), nr_incld("nr_incld", nj, nk_pack), ni_incld("ni_incld", nj, nk_pack), bm_incld("bm_incld", nj, nk_pack),
152-
153-
// Other
154-
inv_dz("inv_dz", nj, nk_pack), inv_rho("inv_rho", nj, nk_pack), ze_ice("ze_ice", nj, nk_pack), ze_rain("ze_rain", nj, nk_pack),
155-
prec("prec", nj, nk_pack), rho("rho", nj, nk_pack), rhofacr("rhofacr", nj, nk_pack), rhofaci("rhofaci", nj, nk_pack),
156-
acn("acn", nj, nk_pack), qv_sat_l("qv_sat", nj, nk_pack), qv_sat_i("qv_sat_i", nj, nk_pack), sup("sup", nj, nk_pack),
157-
qv_supersat_i("qv_supersat", nj, nk_pack), tmparr2("tmparr2", nj, nk_pack), exner("exner", nj, nk_pack),
158-
diag_equiv_reflectivity("diag_equiv_ref", nj, nk_pack), diag_vm_qi("diag_vm_qi", nj, nk_pack), diag_diam_qi("diag_diam_qi", nj, nk_pack),
159-
pratot("pratot", nj, nk_pack), prctot("prctot", nj, nk_pack),
160-
161-
// p3_tend_out, may not need these
162-
qtend_ignore("qtend_ignore", nj, nk_pack), ntend_ignore("ntend_ignore", nj, nk_pack),
163-
164-
// Variables still used in F90 but removed from C++ interface
165-
mu_c("mu_c", nj, nk_pack), lamc("lamc", nj, nk_pack),
166-
qr_evap_tend("qr_evap_tend", nj, nk_pack),
167-
168-
// cloud sedimentation
169-
v_qc("v_qc", nj, nk_pack), v_nc("v_nc", nj, nk_pack), flux_qx("flux_qx", nj, nk_pack), flux_nx("flux_nx", nj, nk_pack),
170-
171-
// ice sedimentation
172-
v_qit("v_qit", nj, nk_pack), v_nit("v_nit", nj, nk_pack), flux_nit("flux_nit", nj, nk_pack), flux_bir("flux_bir", nj, nk_pack),
173-
flux_qir("flux_qir", nj, nk_pack), flux_qit("flux_qit", nj, nk_pack),
174-
175-
// rain sedimentation
176-
v_qr("v_qr", nj, nk_pack), v_nr("v_nr", nj, nk_pack);
177-
178135
// Get views of all inputs
179-
auto pres = diagnostic_inputs.pres;
180-
auto dz = diagnostic_inputs.dz;
181-
auto nc_nuceat_tend = diagnostic_inputs.nc_nuceat_tend;
182-
auto nccn_prescribed = diagnostic_inputs.nccn;
183-
auto ni_activated = diagnostic_inputs.ni_activated;
184-
auto inv_qc_relvar = diagnostic_inputs.inv_qc_relvar;
185-
auto dpres = diagnostic_inputs.dpres;
186-
auto inv_exner = diagnostic_inputs.inv_exner;
187-
auto cld_frac_i = diagnostic_inputs.cld_frac_i;
188-
auto cld_frac_l = diagnostic_inputs.cld_frac_l;
189-
auto cld_frac_r = diagnostic_inputs.cld_frac_r;
190-
auto col_location = infrastructure.col_location;
191-
auto qc = prognostic_state.qc;
192-
auto nc = prognostic_state.nc;
193-
auto qr = prognostic_state.qr;
194-
auto nr = prognostic_state.nr;
195-
auto qi = prognostic_state.qi;
196-
auto qm = prognostic_state.qm;
197-
auto ni = prognostic_state.ni;
198-
auto bm = prognostic_state.bm;
199-
auto qv = prognostic_state.qv;
200-
auto th = prognostic_state.th;
201-
auto diag_eff_radius_qc = diagnostic_outputs.diag_eff_radius_qc;
202-
auto diag_eff_radius_qi = diagnostic_outputs.diag_eff_radius_qi;
203-
auto diag_eff_radius_qr = diagnostic_outputs.diag_eff_radius_qr;
204-
auto qv2qi_depos_tend = diagnostic_outputs.qv2qi_depos_tend;
205-
auto rho_qi = diagnostic_outputs.rho_qi;
206-
auto precip_liq_flux = diagnostic_outputs.precip_liq_flux;
207-
auto precip_ice_flux = diagnostic_outputs.precip_ice_flux;
208-
auto precip_total_tend = diagnostic_outputs.precip_total_tend;
209-
auto nevapr = diagnostic_outputs.nevapr;
210-
auto qv_prev = diagnostic_inputs.qv_prev;
211-
auto t_prev = diagnostic_inputs.t_prev;
212-
auto liq_ice_exchange = history_only.liq_ice_exchange;
213-
auto vap_liq_exchange = history_only.vap_liq_exchange;
214-
auto vap_ice_exchange = history_only.vap_ice_exchange;
136+
auto pres = diagnostic_inputs.pres;
137+
auto dz = diagnostic_inputs.dz;
138+
auto nc_nuceat_tend = diagnostic_inputs.nc_nuceat_tend;
139+
auto nccn_prescribed = diagnostic_inputs.nccn;
140+
auto ni_activated = diagnostic_inputs.ni_activated;
141+
auto inv_qc_relvar = diagnostic_inputs.inv_qc_relvar;
142+
auto dpres = diagnostic_inputs.dpres;
143+
auto inv_exner = diagnostic_inputs.inv_exner;
144+
auto cld_frac_i = diagnostic_inputs.cld_frac_i;
145+
auto cld_frac_l = diagnostic_inputs.cld_frac_l;
146+
auto cld_frac_r = diagnostic_inputs.cld_frac_r;
147+
auto col_location = infrastructure.col_location;
148+
auto qc = prognostic_state.qc;
149+
auto nc = prognostic_state.nc;
150+
auto qr = prognostic_state.qr;
151+
auto nr = prognostic_state.nr;
152+
auto qi = prognostic_state.qi;
153+
auto qm = prognostic_state.qm;
154+
auto ni = prognostic_state.ni;
155+
auto bm = prognostic_state.bm;
156+
auto qv = prognostic_state.qv;
157+
auto th = prognostic_state.th;
158+
auto diag_eff_radius_qc = diagnostic_outputs.diag_eff_radius_qc;
159+
auto diag_eff_radius_qi = diagnostic_outputs.diag_eff_radius_qi;
160+
auto diag_eff_radius_qr = diagnostic_outputs.diag_eff_radius_qr;
161+
auto qv2qi_depos_tend = diagnostic_outputs.qv2qi_depos_tend;
162+
auto rho_qi = diagnostic_outputs.rho_qi;
163+
auto precip_liq_flux = diagnostic_outputs.precip_liq_flux;
164+
auto precip_ice_flux = diagnostic_outputs.precip_ice_flux;
165+
auto precip_total_tend = diagnostic_outputs.precip_total_tend;
166+
auto nevapr = diagnostic_outputs.nevapr;
167+
auto qv_prev = diagnostic_inputs.qv_prev;
168+
auto t_prev = diagnostic_inputs.t_prev;
169+
auto liq_ice_exchange = history_only.liq_ice_exchange;
170+
auto vap_liq_exchange = history_only.vap_liq_exchange;
171+
auto vap_ice_exchange = history_only.vap_ice_exchange;
172+
auto mu_r = temporaries.mu_r;
173+
auto T_atm = temporaries.T_atm;
174+
auto lamr = temporaries.lamr;
175+
auto logn0r = temporaries.logn0r;
176+
auto nu = temporaries.nu;
177+
auto cdist = temporaries.cdist;
178+
auto cdist1 = temporaries.cdist1;
179+
auto cdistr = temporaries.cdistr;
180+
auto inv_cld_frac_i = temporaries.inv_cld_frac_i;
181+
auto inv_cld_frac_l = temporaries.inv_cld_frac_l;
182+
auto inv_cld_frac_r = temporaries.inv_cld_frac_r;
183+
auto qc_incld = temporaries.qc_incld;
184+
auto qr_incld = temporaries.qr_incld;
185+
auto qi_incld = temporaries.qi_incld;
186+
auto qm_incld = temporaries.qm_incld;
187+
auto nc_incld = temporaries.nc_incld;
188+
auto nr_incld = temporaries.nr_incld;
189+
auto ni_incld = temporaries.ni_incld;
190+
auto bm_incld = temporaries.bm_incld;
191+
auto inv_dz = temporaries.inv_dz;
192+
auto inv_rho = temporaries.inv_rho;
193+
auto ze_ice = temporaries.ze_ice;
194+
auto ze_rain = temporaries.ze_rain;
195+
auto prec = temporaries.prec;
196+
auto rho = temporaries.rho;
197+
auto rhofacr = temporaries.rhofacr;
198+
auto rhofaci = temporaries.rhofaci;
199+
auto acn = temporaries.acn;
200+
auto qv_sat_l = temporaries.qv_sat_l;
201+
auto qv_sat_i = temporaries.qv_sat_i;
202+
auto sup = temporaries.sup;
203+
auto qv_supersat_i = temporaries.qv_supersat_i;
204+
auto tmparr2 = temporaries.tmparr2;
205+
auto exner = temporaries.exner;
206+
auto diag_equiv_reflectivity = temporaries.diag_equiv_reflectivity;
207+
auto diag_vm_qi = temporaries.diag_vm_qi;
208+
auto diag_diam_qi = temporaries.diag_diam_qi;
209+
auto pratot = temporaries.pratot;
210+
auto prctot = temporaries.prctot;
211+
auto qtend_ignore = temporaries.qtend_ignore;
212+
auto ntend_ignore = temporaries.ntend_ignore;
213+
auto mu_c = temporaries.mu_c;
214+
auto lamc = temporaries.lamc;
215+
auto qr_evap_tend = temporaries.qr_evap_tend;
216+
auto v_qc = temporaries.v_qc;
217+
auto v_nc = temporaries.v_nc;
218+
auto flux_qx = temporaries.flux_qx;
219+
auto flux_nx = temporaries.flux_nx;
220+
auto v_qit = temporaries.v_qit;
221+
auto v_nit = temporaries.v_nit;
222+
auto flux_nit = temporaries.flux_nit;
223+
auto flux_bir = temporaries.flux_bir;
224+
auto flux_qir = temporaries.flux_qir;
225+
auto flux_qit = temporaries.flux_qit;
226+
auto v_qr = temporaries.v_qr;
227+
auto v_nr = temporaries.v_nr;
215228

216229
// we do not want to measure init stuff
217230
auto start = std::chrono::steady_clock::now();
@@ -242,7 +255,7 @@ ::p3_main_internal_disp(
242255

243256
p3_main_part2_disp(
244257
nj, nk, runtime_options.max_total_ni, infrastructure.predictNc, infrastructure.prescribedCCN, infrastructure.dt, inv_dt,
245-
lookup_tables.dnu_table_vals, lookup_tables.ice_table_vals, lookup_tables.collect_table_vals,
258+
lookup_tables.dnu_table_vals, lookup_tables.ice_table_vals, lookup_tables.collect_table_vals,
246259
lookup_tables.revap_table_vals, pres, dpres, dz, nc_nuceat_tend, inv_exner,
247260
exner, inv_cld_frac_l, inv_cld_frac_i, inv_cld_frac_r, ni_activated, inv_qc_relvar, cld_frac_i,
248261
cld_frac_l, cld_frac_r, qv_prev, t_prev, T_atm, rho, inv_rho, qv_sat_l, qv_sat_i, qv_supersat_i, rhofacr, rhofaci, acn,

0 commit comments

Comments
 (0)