From 663216f0f39d072df86d2ed13a6630c094138958 Mon Sep 17 00:00:00 2001 From: "Andrew M. Bradley" Date: Wed, 23 Oct 2024 16:51:56 -0400 Subject: [PATCH 1/2] EAMxx/SHOC: Fix a subtle GPU bug. Fix either a long-latent issue or a compiler-side issue that was recently triggered by an unrelated PR. Add a test for 128 levels to hold us over until the default for all tests is changed to 128 levels. Add an option to global state hasher to hash a user-provided array. This let me hash temporary workspace as part of isolating the issue. --- cime_config/tests.py | 1 + .../shoc/eamxx_shoc_process_interface.hpp | 3 ++- .../share/atm_process/atmosphere_process.hpp | 3 ++- .../atm_process/atmosphere_process_hash.cpp | 21 +++++++++++++++---- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/cime_config/tests.py b/cime_config/tests.py index bce0c4fcc26..e9bca7bb250 100644 --- a/cime_config/tests.py +++ b/cime_config/tests.py @@ -705,6 +705,7 @@ "PEM_Ln90.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-spa_remap--scream-output-preset-4", "ERS_Ln90.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-small_kernels--scream-output-preset-5", "ERP_Ln22.conusx4v1pg2_r05_oECv3.F2010-SCREAMv1-noAero.scream-bfbhash--scream-output-preset-6", + "ERS_Ld1.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-L128--scream-output-preset-4" ) }, diff --git a/components/eamxx/src/physics/shoc/eamxx_shoc_process_interface.hpp b/components/eamxx/src/physics/shoc/eamxx_shoc_process_interface.hpp index 4d76efd4558..de38a7cac7f 100644 --- a/components/eamxx/src/physics/shoc/eamxx_shoc_process_interface.hpp +++ b/components/eamxx/src/physics/shoc/eamxx_shoc_process_interface.hpp @@ -138,8 +138,9 @@ class SHOCMacrophysics : public scream::AtmosphereProcess // Dry static energy shoc_s(i,k) = PF::calculate_dse(T_mid(i,k),z_mid(i,k),phis(i)); + + if (k+1 == nlev_packs) zi_grid(i,nlevi_v)[nlevi_p] = 0; }); - zi_grid(i,nlevi_v)[nlevi_p] = 0; team.team_barrier(); const auto zt_grid_s = ekat::subview(zt_grid, i); diff --git a/components/eamxx/src/share/atm_process/atmosphere_process.hpp b/components/eamxx/src/share/atm_process/atmosphere_process.hpp index 3150f56c9f9..7face2d5aa9 100644 --- a/components/eamxx/src/share/atm_process/atmosphere_process.hpp +++ b/components/eamxx/src/share/atm_process/atmosphere_process.hpp @@ -274,7 +274,8 @@ class AtmosphereProcess : public ekat::enable_shared_from_this& fgs, HashType& accum) { } // namespace anon +// (mem, nmem) describe an arbitrary device array. If non-0, the array will be +// hashed and reported as a fourth line. void AtmosphereProcess ::print_global_state_hash (const std::string& label, const bool in, const bool out, - const bool internal) const { - static constexpr int nslot = 3; + const bool internal, const Real* mem, const int nmem) const { + static constexpr int nslot = 4; HashType laccum[nslot] = {0}; hash(m_fields_in, laccum[0]); hash(m_groups_in, laccum[0]); hash(m_fields_out, laccum[1]); hash(m_groups_out, laccum[1]); hash(m_internal_fields, laccum[2]); + const bool hash_array = mem != nullptr; + if (hash_array) { + HashType accum = 0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, nmem), + KOKKOS_LAMBDA(const int i, HashType& accum) { bfbhash::hash(mem[i], accum); }, + bfbhash::HashReducer<>(accum)); + Kokkos::fence(); + laccum[3] = accum; + } HashType gaccum[nslot]; - bfbhash::all_reduce_HashType(m_comm.mpi_comm(), laccum, gaccum, nslot); - const bool show[] = {in, out, internal}; + const int nr = hash_array ? nslot : nslot-1; + bfbhash::all_reduce_HashType(m_comm.mpi_comm(), laccum, gaccum, nr); + const bool show[] = {in, out, internal, hash_array}; if (m_comm.am_i_root()) for (int i = 0; i < nslot; ++i) if (show[i]) From d57208cb97e35033e8cecf3d5509a8499f9091be Mon Sep 17 00:00:00 2001 From: "Andrew M. Bradley" Date: Wed, 23 Oct 2024 22:15:11 -0400 Subject: [PATCH 2/2] Fix ERS run length. --- cime_config/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/tests.py b/cime_config/tests.py index e9bca7bb250..47418ae08f1 100644 --- a/cime_config/tests.py +++ b/cime_config/tests.py @@ -705,7 +705,7 @@ "PEM_Ln90.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-spa_remap--scream-output-preset-4", "ERS_Ln90.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-small_kernels--scream-output-preset-5", "ERP_Ln22.conusx4v1pg2_r05_oECv3.F2010-SCREAMv1-noAero.scream-bfbhash--scream-output-preset-6", - "ERS_Ld1.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-L128--scream-output-preset-4" + "ERS_Ln22.ne30pg2_ne30pg2.F2010-SCREAMv1.scream-L128--scream-output-preset-4" ) },