From ccf65fb6d3650baf24444451c173d006665d7f48 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Mon, 12 May 2025 09:32:54 -0600 Subject: [PATCH 1/4] Remove YAKL backend support for eamxx/rrtmgp [BFB] --- components/eam/src/physics/rrtmgp/external | 2 +- components/eamxx/CMakeLists.txt | 21 +- .../eamxx/kokkos_rrtmgp/shell_commands | 3 - .../eamxx/yakl_rrtmgp/shell_commands | 2 - .../eamxx/src/physics/rrtmgp/CMakeLists.txt | 170 +-- .../physics/rrtmgp/eamxx_rrtmgp_interface.cpp | 1107 ----------------- .../physics/rrtmgp/eamxx_rrtmgp_interface.hpp | 129 -- .../rrtmgp/eamxx_rrtmgp_process_interface.cpp | 629 +--------- .../rrtmgp/eamxx_rrtmgp_process_interface.hpp | 106 -- .../src/physics/rrtmgp/rrtmgp_test_utils.cpp | 121 -- .../src/physics/rrtmgp/rrtmgp_test_utils.hpp | 32 - .../eamxx/src/physics/rrtmgp/rrtmgp_utils.hpp | 64 - .../rrtmgp/tests/generate_baseline.cpp | 97 -- .../src/physics/rrtmgp/tests/rrtmgp_tests.cpp | 300 +---- .../rrtmgp/tests/rrtmgp_unit_tests.cpp | 847 +------------ .../homme_shoc_cld_spa_p3_rrtmgp_pg2_dp.cpp | 2 +- .../single-process/rrtmgp/CMakeLists.txt | 9 +- .../rrtmgp/rrtmgp_standalone_unit.cpp | 393 +----- 18 files changed, 36 insertions(+), 3998 deletions(-) delete mode 100644 components/eamxx/cime_config/testdefs/testmods_dirs/eamxx/kokkos_rrtmgp/shell_commands delete mode 100644 components/eamxx/cime_config/testdefs/testmods_dirs/eamxx/yakl_rrtmgp/shell_commands diff --git a/components/eam/src/physics/rrtmgp/external b/components/eam/src/physics/rrtmgp/external index a4f340db2d54..7a96f68db5b4 160000 --- a/components/eam/src/physics/rrtmgp/external +++ b/components/eam/src/physics/rrtmgp/external @@ -1 +1 @@ -Subproject commit a4f340db2d545385a57c19e9c4d3ec0c399dc7fb +Subproject commit 7a96f68db5b4cba11e229968f8c17f0d74913db9 diff --git a/components/eamxx/CMakeLists.txt b/components/eamxx/CMakeLists.txt index 2ded53df65ed..388f7ba6d690 100644 --- a/components/eamxx/CMakeLists.txt +++ b/components/eamxx/CMakeLists.txt @@ -68,7 +68,7 @@ else() endif() #################################################################### -# Kokkos/YAKL-related settings # +# Kokkos related settings # #################################################################### if (Kokkos_ENABLE_CUDA) @@ -95,16 +95,12 @@ endif() option (Kokkos_ENABLE_SERIAL "" ON) set (EAMXX_ENABLE_GPU FALSE CACHE BOOL "") -set (CUDA_BUILD FALSE CACHE BOOL "") #needed for yakl if kokkos vars are not visible there? -set (HIP_BUILD FALSE CACHE BOOL "") #needed for yakl if kokkos vars are not visible there? -set (SYCL_BUILD FALSE CACHE BOOL "") #needed for yakl if kokkos vars are not visible there? # Determine if this is a Cuda build. if (Kokkos_ENABLE_CUDA) # Add CUDA as a language for CUDA builds enable_language(CUDA) set (EAMXX_ENABLE_GPU TRUE CACHE BOOL "" FORCE) - set (CUDA_BUILD TRUE CACHE BOOL "" FORCE) #needed for yakl if kokkos vars are not visible there? endif () # Determine if this is a HIP build. @@ -112,14 +108,12 @@ if (Kokkos_ENABLE_HIP) # Add CUDA as a language for CUDA builds enable_language(HIP) set (EAMXX_ENABLE_GPU TRUE CACHE BOOL "" FORCE) - set (HIP_BUILD TRUE CACHE BOOL "" FORCE) #needed for yakl if kokkos vars are not visible there? endif () # Determine if this is a sycl build. if (Kokkos_ENABLE_SYCL) #enable_language(SYCL) set (EAMXX_ENABLE_GPU TRUE CACHE BOOL "" FORCE) - set (SYCL_BUILD TRUE CACHE BOOL "" FORCE) #needed for yakl if kokkos vars are not visible there? endif () if( NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "[Cc]lang" ) @@ -226,17 +220,6 @@ endif() # #cmakedefine RRTMGP_EXPENSIVE_CHECKS option (SCREAM_RRTMGP_DEBUG "Turn on extra debug checks in RRTMGP" ${SCREAM_DEBUG}) -option(SCREAM_RRTMGP_ENABLE_YAKL "Use YAKL under rrtmgp" FALSE) -option(SCREAM_RRTMGP_ENABLE_KOKKOS "Use Kokkos under rrtmgp" TRUE) -if (SCREAM_RRTMGP_ENABLE_YAKL) - add_definitions("-DRRTMGP_ENABLE_YAKL") -endif() - -if (SCREAM_RRTMGP_ENABLE_KOKKOS) - add_definitions("-DRRTMGP_ENABLE_KOKKOS") -endif() - - set(SCREAM_DOUBLE_PRECISION TRUE CACHE BOOL "Set to double precision (default True)") # For now, only used in share/grid/remap/refining_remapper_rma.*pp @@ -623,8 +606,6 @@ function (print_var var) endfunction () print_var(EAMXX_ENABLE_GPU) -print_var(CUDA_BUILD) -print_var(HIP_BUILD) print_var(SCREAM_MACHINE) print_var(SCREAM_DYNAMICS_DYCORE) print_var(SCREAM_DOUBLE_PRECISION) diff --git a/components/eamxx/cime_config/testdefs/testmods_dirs/eamxx/kokkos_rrtmgp/shell_commands b/components/eamxx/cime_config/testdefs/testmods_dirs/eamxx/kokkos_rrtmgp/shell_commands deleted file mode 100644 index 817dcf42fef5..000000000000 --- a/components/eamxx/cime_config/testdefs/testmods_dirs/eamxx/kokkos_rrtmgp/shell_commands +++ /dev/null @@ -1,3 +0,0 @@ -./xmlchange --append SCREAM_CMAKE_OPTIONS='SCREAM_RRTMGP_ENABLE_YAKL Off' -./xmlchange --append SCREAM_CMAKE_OPTIONS='SCREAM_RRTMGP_ENABLE_KOKKOS On' - diff --git a/components/eamxx/cime_config/testdefs/testmods_dirs/eamxx/yakl_rrtmgp/shell_commands b/components/eamxx/cime_config/testdefs/testmods_dirs/eamxx/yakl_rrtmgp/shell_commands deleted file mode 100644 index 61d571c95974..000000000000 --- a/components/eamxx/cime_config/testdefs/testmods_dirs/eamxx/yakl_rrtmgp/shell_commands +++ /dev/null @@ -1,2 +0,0 @@ -./xmlchange --append SCREAM_CMAKE_OPTIONS='SCREAM_RRTMGP_ENABLE_YAKL On' -./xmlchange --append SCREAM_CMAKE_OPTIONS='SCREAM_RRTMGP_ENABLE_KOKKOS Off' diff --git a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt index 2387b3af8e5b..cf134dd4a2d7 100644 --- a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt +++ b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt @@ -2,110 +2,6 @@ include(EkatUtils) include(EkatSetCompilerFlags) include(ScreamUtils) -# Copied from EKAT, YAKL is an interface target so requires special -# handling. Get rid of this once RRTMGP is using kokkos. -macro (SetCudaFlagsYakl targetName) - if (Kokkos_ENABLE_CUDA) - # We must find CUDA - find_package(CUDA REQUIRED) - - # Still check if CUDA_FOUND is true, since we don't know if the particular - # FindCUDA.cmake module being used is checking _FIND_REQUIRED - if (NOT CUDA_FOUND) - message (FATAL_ERROR "Error! Unable to find CUDA.") - endif() - - set(options CUDA_LANG) - set(args1v) - set(argsMv FLAGS) - cmake_parse_arguments(SCF "${options}" "${args1v}" "${argsMv}" ${ARGN}) - - if (SCF_FLAGS) - set (FLAGS ${SCF_FLAGS}) - else () - # We need host-device lambdas - set (FLAGS --expt-extended-lambda) - - IsDebugBuild (SCF_DEBUG) - if (SCF_DEBUG) - # Turn off fused multiply add for debug so we can stay BFB with host - list (APPEND FLAGS --fmad=false) - endif() - endif() - - # Set the flags on the target - if (SCF_CUDA_LANG) - # User is setting the src files language to CUDA - target_compile_options (${targetName} INTERFACE - "$<$:${FLAGS}>") - else() - # We assume the user is setting the src files lang to CXX - target_compile_options (${targetName} INTERFACE - "$<$:${FLAGS}>") - endif() - endif() -endmacro() - -################################## -# YAKL # -################################## - -# RRTMGP++ requires YAKL -if (SCREAM_RRTMGP_ENABLE_YAKL) - string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_ci) - if (TARGET yakl) - # Other E3SM components are building YAKL... - message ("It appears some other part of E3SM is building YAKL.\n" - "We will reuse that, but if this is a debug build we will\n" - "add the --fmad=false flag to the cuda flags used by YAKL\n") - else () - # Prepare CUDA/HIP flags for YAKL - if (CUDA_BUILD) - string(REPLACE ";" " " KOKKOS_CUDA_OPTIONS_STR "${KOKKOS_CUDA_OPTIONS}") - set(YAKL_ARCH "CUDA") - set(YAKL_CUDA_FLAGS "-DYAKL_ARCH_CUDA ${KOKKOS_CUDA_OPTIONS_STR} --expt-relaxed-constexpr -ccbin ${CMAKE_CXX_COMPILER}") - string (REPLACE " " ";" YAKL_CUDA_FLAGS_LIST ${YAKL_CUDA_FLAGS}) - endif() - if (HIP_BUILD) - set(YAKL_ARCH "HIP") - set(YAKL_HIP_FLAGS "-DYAKL_ARCH_HIP -O3 -D__HIP_ROCclr__ -D__HIP_ARCH_GFX90A__=1 --rocm-path=${ROCM_PATH} --offload-arch=gfx90a -x hip") - string (REPLACE " " ";" YAKL_HIP_FLAGS_LIST ${YAKL_HIP_FLAGS}) - endif() - if (SYCL_BUILD) - set(YAKL_ARCH "SYCL") - set(YAKL_SYCL_FLAGS " -fp-model precise -DYAKL_ARCH_SYCL -\-intel -fsycl -fsycl-targets=spir64_gen -mlong-double-64") - string (REPLACE " " ";" YAKL_SYCL_FLAGS_LIST ${YAKL_SYCL_FLAGS}) - endif() - - set (YAKL_SOURCE_DIR ${SCREAM_BASE_DIR}/../../externals/YAKL) - add_subdirectory(${YAKL_SOURCE_DIR} ${CMAKE_BINARY_DIR}/externals/YAKL) - - # Set some additional flag/cpp option on the yakl target - - cmake_policy (SET CMP0079 NEW) # Allow to link to a tgt from a different directory - - # EAMxx *requires* MPI, so simply look for it, then link against it - find_package(MPI REQUIRED COMPONENTS C) - target_link_libraries (yakl INTERFACE MPI::MPI_C) - - # For debug builds, set -DYAKL_DEBUG - if (CMAKE_BUILD_TYPE_ci STREQUAL "debug") - target_compile_definitions(yakl INTERFACE YAKL_DEBUG) - endif() - endif() - - # See eamxx/src/dynamics/homme/CMakeLists.txt for an explanation of this - # workaround. - if ((SCREAM_MACHINE STREQUAL "ascent" OR SCREAM_MACHINE STREQUAL "pm-gpu") AND CMAKE_BUILD_TYPE_ci STREQUAL "debug") - SetCudaFlagsYakl(yakl CUDA_LANG FLAGS -UNDEBUG) - else() - SetCudaFlagsYakl(yakl CUDA_LANG) - endif() - - list(APPEND CMAKE_MODULE_PATH ${YAKL_SOURCE_DIR}) - include (yakl_utils) -endif() - ################################## # RRTMGP # ################################## @@ -113,42 +9,14 @@ endif() set(EAM_RRTMGP_DIR ${SCREAM_BASE_DIR}/../eam/src/physics/rrtmgp) # Build RRTMGP library; this builds the core RRTMGP external source as a library named "rrtmgp" # NOTE: The external RRTMGP build needs some fixes to work with CUDA in a library build, so for now we will build these ourselves -set(EXTERNAL_SRC - ${EAM_RRTMGP_DIR}/external/cpp/rrtmgp/kernels/mo_gas_optics_kernels.cpp - ${EAM_RRTMGP_DIR}/external/cpp/rrtmgp/mo_rrtmgp_util_reorder.cpp - ${EAM_RRTMGP_DIR}/external/cpp/rte/expand_and_transpose.cpp - ${EAM_RRTMGP_DIR}/external/cpp/rte/kernels/mo_fluxes_broadband_kernels.cpp - ${EAM_RRTMGP_DIR}/external/cpp/rte/kernels/mo_optical_props_kernels.cpp - ${EAM_RRTMGP_DIR}/external/cpp/rte/kernels/mo_rte_solver_kernels.cpp - ${EAM_RRTMGP_DIR}/external/cpp/extensions/fluxes_byband/mo_fluxes_byband_kernels.cpp - ${EAM_RRTMGP_DIR}/external/cpp/examples/all-sky/mo_garand_atmos_io.cpp - ${EAM_RRTMGP_DIR}/external/cpp/examples/all-sky/mo_load_cloud_coefficients.cpp - ${EAM_RRTMGP_DIR}/external/cpp/examples/mo_load_coefficients.cpp -) -add_library(rrtmgp ${EXTERNAL_SRC}) -target_compile_definitions(rrtmgp PUBLIC EAMXX_HAS_RRTMGP) -EkatDisableAllWarning(rrtmgp) -if (SCREAM_RRTMGP_ENABLE_YAKL) - yakl_process_target(rrtmgp) -else() - if (CUDA_BUILD) - target_compile_options(rrtmgp PUBLIC $<$:--expt-relaxed-constexpr>) - endif() -endif() +add_library(rrtmgp INTERFACE) +target_compile_definitions(rrtmgp INTERFACE EAMXX_HAS_RRTMGP) -# NOTE: cannot use 'PUBLIC' in target_link_libraries, -# since yakl_process_target already used it -# with the "plain" signature if (NOT TARGET Kokkos::kokkos) find_package(Kokkos REQUIRED) endif () -if (SCREAM_RRTMGP_ENABLE_YAKL) - target_link_libraries(rrtmgp yakl Kokkos::kokkos) -else() - target_link_libraries(rrtmgp Kokkos::kokkos) -endif() -target_include_directories(rrtmgp PUBLIC - ${SCREAM_BASE_DIR}/../../externals/YAKL +target_link_libraries(rrtmgp INTERFACE Kokkos::kokkos) +target_include_directories(rrtmgp INTERFACE ${EAM_RRTMGP_DIR}/external/cpp ${EAM_RRTMGP_DIR}/external/cpp/extensions/cloud_optics ${EAM_RRTMGP_DIR}/external/cpp/examples @@ -166,29 +34,14 @@ target_include_directories(rrtmgp PUBLIC # separates out the code that comprises the core RRTMGP library from the extensions # and examples that we have modified for use in SCREAM specifically. -# However, due to the mix of YAKL and Kokkos, we split the target in two: -# - scream_rrtmgp: kokkos-based interface to EAMxx -# - scream_rrtmgp_yakl: source codes to be built with YAKL flags/options - -################################## -# SCREAM_RRTMGP_YAKL # -################################## - set(SCREAM_RRTMGP_SOURCES_INTERFACE eamxx_rrtmgp_interface.cpp ) add_library(eamxx_rrtmgp_interface ${SCREAM_RRTMGP_SOURCES_INTERFACE}) -if (SCREAM_RRTMGP_ENABLE_YAKL) - yakl_process_target(eamxx_rrtmgp_interface) -endif() - -# NOTE: cannot use 'PUBLIC' in target_link_libraries, -# since yakl_process_target already used it -# with the "plain" signature find_library(NETCDF_C netcdf HINTS ${NetCDF_C_PATH} PATH_SUFFIXES lib lib64) -target_link_libraries(eamxx_rrtmgp_interface ${NETCDF_C} rrtmgp scream_share Kokkos::kokkos) +target_link_libraries(eamxx_rrtmgp_interface PUBLIC ${NETCDF_C} rrtmgp scream_share Kokkos::kokkos) target_include_directories(eamxx_rrtmgp_interface PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(eamxx_rrtmgp_interface SYSTEM PUBLIC @@ -213,19 +66,6 @@ target_include_directories(scream_rrtmgp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/modules) -# If yakl builds with LANG!=CXX, then the yakl CPP defines don't transfer to scream -# targets, b/c of the lang difference. So, if YAKL_ARCH is set, we add -# ${YAKL_${YAKL_ARCH}_FLAGS} flags to the CXX flags of scream_rrtmgp. -# In particular, this will ensure that all the yakl macros -# are correctly defined in YAKL headers, depending on the backend -if (SCREAM_RRTMGP_ENABLE_YAKL) - if (YAKL_ARCH) - target_compile_options(scream_rrtmgp PUBLIC - "$<$:${YAKL_${YAKL_ARCH}_FLAGS_LIST}>") - endif() -endif() - - # Ensure RRTMGP lookup tables are present in the data dir set (RRTMGP_TABLES scream/init/rrtmgp-data-sw-g112-210809.nc diff --git a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_interface.cpp b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_interface.cpp index e59d11dc2f83..ddd9093a5d60 100644 --- a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_interface.cpp +++ b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_interface.cpp @@ -5,1120 +5,13 @@ namespace scream { void init_kls () { -#ifdef RRTMGP_ENABLE_YAKL - // Initialize yakl - if(!yakl::isInitialized()) { yakl::init(); } -#endif -#ifdef RRTMGP_ENABLE_KOKKOS // Initialize kokkos if(!Kokkos::is_initialized()) { Kokkos::initialize(); } -#endif } void finalize_kls() { -#ifdef RRTMGP_ENABLE_YAKL - // Finalize YAKL - yakl::finalize(); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS //Kokkos::finalize(); We do the kokkos finalization elsewhere -#endif } -#ifdef RRTMGP_ENABLE_YAKL -namespace rrtmgp { - -using yakl::fortran::parallel_for; -using yakl::fortran::SimpleBounds; -using yakl::intrinsics::merge; -/* - * Objects containing k-distribution information need to be initialized - * once and then persist throughout the life of the program, so we - * declare them here within the rrtmgp namespace. - */ -GasOpticsRRTMGP k_dist_sw; -GasOpticsRRTMGP k_dist_lw; - -/* - * Objects containing cloud optical property look-up table information. - * We want to initialize these once and use throughout the life of the - * program, so declare here and read data in during rrtmgp_initialize(). - */ -CloudOptics cloud_optics_sw; -CloudOptics cloud_optics_lw; - -bool initialized = false; -bool initialized_k = false; - -// local functions -namespace { - -OpticalProps2str get_cloud_optics_sw( - const int ncol, const int nlay, - CloudOptics &cloud_optics, GasOpticsRRTMGP &kdist, - real2d &lwp, real2d &iwp, real2d &rel, real2d &rei) { - - // Initialize optics - OpticalProps2str clouds; - clouds.init(kdist.get_band_lims_wavenumber()); - clouds.alloc_2str(ncol, nlay); - - // Needed for consistency with all-sky example problem? - cloud_optics.set_ice_roughness(2); - - // Limit effective radii to be within bounds of lookup table - auto rel_limited = real2d("rel_limited", ncol, nlay); - auto rei_limited = real2d("rei_limited", ncol, nlay); - limit_to_bounds(rel, cloud_optics.radliq_lwr, cloud_optics.radliq_upr, rel_limited); - limit_to_bounds(rei, cloud_optics.radice_lwr, cloud_optics.radice_upr, rei_limited); - - // Calculate cloud optics - cloud_optics.cloud_optics(ncol, nlay, lwp, iwp, rel_limited, rei_limited, clouds); - - // Return optics - return clouds; -} - -OpticalProps1scl get_cloud_optics_lw( - const int ncol, const int nlay, - CloudOptics &cloud_optics, GasOpticsRRTMGP &kdist, - real2d &lwp, real2d &iwp, real2d &rel, real2d &rei) { - - // Initialize optics - OpticalProps1scl clouds; - clouds.init(kdist.get_band_lims_wavenumber()); - clouds.alloc_1scl(ncol, nlay); // this is dumb, why do we need to init and alloc separately?! - - // Needed for consistency with all-sky example problem? - cloud_optics.set_ice_roughness(2); - - // Limit effective radii to be within bounds of lookup table - auto rel_limited = real2d("rel_limited", ncol, nlay); - auto rei_limited = real2d("rei_limited", ncol, nlay); - limit_to_bounds(rel, cloud_optics.radliq_lwr, cloud_optics.radliq_upr, rel_limited); - limit_to_bounds(rei, cloud_optics.radice_lwr, cloud_optics.radice_upr, rei_limited); - - // Calculate cloud optics - cloud_optics.cloud_optics(ncol, nlay, lwp, iwp, rel_limited, rei_limited, clouds); - - // Return optics - return clouds; -} - -OpticalProps2str get_subsampled_clouds( - const int ncol, const int nlay, const int nbnd, const int ngpt, - OpticalProps2str &cloud_optics, GasOpticsRRTMGP &kdist, real2d &cld, real2d &p_lay) { - // Initialized subsampled optics - OpticalProps2str subsampled_optics; - subsampled_optics.init(kdist.get_band_lims_wavenumber(), kdist.get_band_lims_gpoint(), "subsampled_optics"); - subsampled_optics.alloc_2str(ncol, nlay); - // Check that we do not have clouds with no optical properties; this would get corrected - // when we assign optical props, but we want to use a "radiative cloud fraction" - // for the subcolumn sampling too because otherwise we can get vertically-contiguous cloud - // mask profiles with no actual cloud properties in between, which would just further overestimate - // the vertical correlation of cloudy layers. I.e., cloudy layers might look maximally overlapped - // even when separated by layers with no cloud properties, when in fact those layers should be - // randomly overlapped. - auto cldfrac_rad = real2d("cldfrac_rad", ncol, nlay); - memset(cldfrac_rad, 0.0); // Start with all zeros - TIMED_KERNEL(parallel_for(SimpleBounds<3>(nbnd,nlay,ncol), YAKL_LAMBDA (int ibnd, int ilay, int icol) { - if (cloud_optics.tau(icol,ilay,ibnd) > 0) { - cldfrac_rad(icol,ilay) = cld(icol,ilay); - } - })); - // Get subcolumn cloud mask; note that get_subcolumn_mask exposes overlap assumption as an option, - // but the only currently supported options are 0 (trivial all-or-nothing cloud) or 1 (max-rand), - // so overlap has not been exposed as an option beyond this subcolumn. In the future, we should - // support generalized overlap as well, with parameters derived from DPSCREAM simulations with very - // high resolution. - int overlap = 1; - // Get unique seeds for each column that are reproducible across different MPI rank layouts; - // use decimal part of pressure for this, consistent with the implementation in EAM - auto seeds = int1d("seeds", ncol); - TIMED_KERNEL(parallel_for(SimpleBounds<1>(ncol), YAKL_LAMBDA(int icol) { - seeds(icol) = 1e9 * (p_lay(icol,nlay) - int(p_lay(icol,nlay))); - })); - auto cldmask = get_subcolumn_mask(ncol, nlay, ngpt, cldfrac_rad, overlap, seeds); - - // Assign optical properties to subcolumns (note this implements MCICA) - auto gpoint_bands = kdist.get_gpoint_bands(); - TIMED_KERNEL(parallel_for(SimpleBounds<3>(ngpt,nlay,ncol), YAKL_LAMBDA(int igpt, int ilay, int icol) { - auto ibnd = gpoint_bands(igpt); - if (cldmask(icol,ilay,igpt) == 1) { - subsampled_optics.tau(icol,ilay,igpt) = cloud_optics.tau(icol,ilay,ibnd); - subsampled_optics.ssa(icol,ilay,igpt) = cloud_optics.ssa(icol,ilay,ibnd); - subsampled_optics.g (icol,ilay,igpt) = cloud_optics.g (icol,ilay,ibnd); - } else { - subsampled_optics.tau(icol,ilay,igpt) = 0; - subsampled_optics.ssa(icol,ilay,igpt) = 0; - subsampled_optics.g (icol,ilay,igpt) = 0; - } - })); - return subsampled_optics; -} - -OpticalProps1scl get_subsampled_clouds( - const int ncol, const int nlay, const int nbnd, const int ngpt, - OpticalProps1scl &cloud_optics, GasOpticsRRTMGP &kdist, real2d &cld, real2d &p_lay) { - // Initialized subsampled optics - OpticalProps1scl subsampled_optics; - subsampled_optics.init(kdist.get_band_lims_wavenumber(), kdist.get_band_lims_gpoint(), "subsampled_optics"); - subsampled_optics.alloc_1scl(ncol, nlay); - // Check that we do not have clouds with no optical properties; this would get corrected - // when we assign optical props, but we want to use a "radiative cloud fraction" - // for the subcolumn sampling too because otherwise we can get vertically-contiguous cloud - // mask profiles with no actual cloud properties in between, which would just further overestimate - // the vertical correlation of cloudy layers. I.e., cloudy layers might look maximally overlapped - // even when separated by layers with no cloud properties, when in fact those layers should be - // randomly overlapped. - auto cldfrac_rad = real2d("cldfrac_rad", ncol, nlay); - memset(cldfrac_rad, 0.0); // Start with all zeros - TIMED_KERNEL(parallel_for(SimpleBounds<3>(nbnd,nlay,ncol), YAKL_LAMBDA (int ibnd, int ilay, int icol) { - if (cloud_optics.tau(icol,ilay,ibnd) > 0) { - cldfrac_rad(icol,ilay) = cld(icol,ilay); - } - })); - // Get subcolumn cloud mask - int overlap = 1; - // Get unique seeds for each column that are reproducible across different MPI rank layouts; - // use decimal part of pressure for this, consistent with the implementation in EAM; use different - // seed values for longwave and shortwave - auto seeds = int1d("seeds", ncol); - TIMED_KERNEL(parallel_for(SimpleBounds<1>(ncol), YAKL_LAMBDA(int icol) { - seeds(icol) = 1e9 * (p_lay(icol,nlay-1) - int(p_lay(icol,nlay-1))); - })); - auto cldmask = get_subcolumn_mask(ncol, nlay, ngpt, cldfrac_rad, overlap, seeds); - // Assign optical properties to subcolumns (note this implements MCICA) - auto gpoint_bands = kdist.get_gpoint_bands(); - TIMED_KERNEL(parallel_for(SimpleBounds<3>(ngpt,nlay,ncol), YAKL_LAMBDA(int igpt, int ilay, int icol) { - auto ibnd = gpoint_bands(igpt); - if (cldmask(icol,ilay,igpt) == 1) { - subsampled_optics.tau(icol,ilay,igpt) = cloud_optics.tau(icol,ilay,ibnd); - } else { - subsampled_optics.tau(icol,ilay,igpt) = 0; - } - })); - return subsampled_optics; -} - -} - -/* - * The following routines provide a simple interface to RRTMGP. These - * can be used as-is, but are intended to be wrapped by the SCREAM AD - * interface to radiation. - */ -void rrtmgp_initialize(GasConcs &gas_concs, - const std::string& coefficients_file_sw, const std::string& coefficients_file_lw, - const std::string& cloud_optics_file_sw, const std::string& cloud_optics_file_lw, - const std::shared_ptr& logger) { - - // If we've already initialized, just exit - if (initialized) { - if (logger) - logger->info("RRTMGP is already initialized; skipping\n"); - return; - } - - // Initialize YAKL - if (!yakl::isInitialized()) { yakl::init(); } - - // Load and initialize absorption coefficient data - load_and_init(k_dist_sw, coefficients_file_sw, gas_concs); - load_and_init(k_dist_lw, coefficients_file_lw, gas_concs); - - // Load and initialize cloud optical property look-up table information - load_cld_lutcoeff(cloud_optics_sw, cloud_optics_file_sw); - load_cld_lutcoeff(cloud_optics_lw, cloud_optics_file_lw); - - // We are now initialized! - initialized = true; -} - -void rrtmgp_finalize() { - initialized = false; - k_dist_sw.finalize(); - k_dist_lw.finalize(); - cloud_optics_sw.finalize(); //~CloudOptics(); - cloud_optics_lw.finalize(); //~CloudOptics(); -} - -void compute_band_by_band_surface_albedos( - const int ncol, const int nswbands, - real1d &sfc_alb_dir_vis, real1d &sfc_alb_dir_nir, - real1d &sfc_alb_dif_vis, real1d &sfc_alb_dif_nir, - real2d &sfc_alb_dir, real2d &sfc_alb_dif) { - - EKAT_ASSERT_MSG(initialized, "Error! rrtmgp_initialize must be called before GasOpticsRRTMGP object can be used."); - auto wavenumber_limits = k_dist_sw.get_band_lims_wavenumber(); - - EKAT_ASSERT_MSG(yakl::intrinsics::size(wavenumber_limits, 1) == 2, - "Error! 1st dimension for wavenumber_limits should be 2."); - EKAT_ASSERT_MSG(yakl::intrinsics::size(wavenumber_limits, 2) == nswbands, - "Error! 2nd dimension for wavenumber_limits should be " + std::to_string(nswbands) + " (nswbands)."); - - // Loop over bands, and determine for each band whether it is broadly in the - // visible or infrared part of the spectrum (visible or "not visible") - TIMED_KERNEL(parallel_for(SimpleBounds<2>(nswbands, ncol), YAKL_LAMBDA(const int ibnd, const int icol) { - - // Threshold between visible and infrared is 0.7 micron, or 14286 cm^-1. - const real visible_wavenumber_threshold = 14286; - - // Wavenumber is in the visible if it is above the visible wavenumber - // threshold, and in the infrared if it is below the threshold - const bool is_visible_wave1 = (wavenumber_limits(1, ibnd) > visible_wavenumber_threshold ? true : false); - const bool is_visible_wave2 = (wavenumber_limits(2, ibnd) > visible_wavenumber_threshold ? true : false); - - if (is_visible_wave1 && is_visible_wave2) { - - // Entire band is in the visible - sfc_alb_dir(icol,ibnd) = sfc_alb_dir_vis(icol); - sfc_alb_dif(icol,ibnd) = sfc_alb_dif_vis(icol); - - } else if (!is_visible_wave1 && !is_visible_wave2) { - - // Entire band is in the longwave (near-infrared) - sfc_alb_dir(icol,ibnd) = sfc_alb_dir_nir(icol); - sfc_alb_dif(icol,ibnd) = sfc_alb_dif_nir(icol); - - } else { - - // Band straddles the visible to near-infrared transition, so we take - // the albedo to be the average of the visible and near-infrared - // broadband albedos - sfc_alb_dir(icol,ibnd) = 0.5*(sfc_alb_dir_vis(icol) + sfc_alb_dir_nir(icol)); - sfc_alb_dif(icol,ibnd) = 0.5*(sfc_alb_dif_vis(icol) + sfc_alb_dif_nir(icol)); - - } - })); -} - -void compute_broadband_surface_fluxes( - const int ncol, const int ktop, const int nswbands, - real3d &sw_bnd_flux_dir , real3d &sw_bnd_flux_dif , - real1d &sfc_flux_dir_vis, real1d &sfc_flux_dir_nir, - real1d &sfc_flux_dif_vis, real1d &sfc_flux_dif_nir) { - // Band 10 straddles the near-IR and visible, so divide contributions from band 10 between both broadband sums - // TODO: Hard-coding these band indices is really bad practice. If the bands ever were to change (like when - // the RRTMG bands were re-ordered for RRTMGP), we would be using the wrong bands for the IR and UV/VIS. This - // should be refactored to grab the correct bands by specifying appropriate wavenumber rather than index. - //sfc_flux_dir_nir(i) = sum(sw_bnd_flux_dir(i+1,kbot,1:9)) + 0.5 * sw_bnd_flux_dir(i+1,kbot,10); - //sfc_flux_dir_vis(i) = sum(sw_bnd_flux_dir(i+1,kbot,11:14)) + 0.5 * sw_bnd_flux_dir(i+1,kbot,10); - //sfc_flux_dif_nir(i) = sum(sw_bnd_flux_dif(i+1,kbot,1:9)) + 0.5 * sw_bnd_flux_dif(i+1,kbot,10); - //sfc_flux_dif_vis(i) = sum(sw_bnd_flux_dif(i+1,kbot,11:14)) + 0.5 * sw_bnd_flux_dif(i+1,kbot,10); - - // Initialize sums over bands - memset(sfc_flux_dir_nir, 0); - memset(sfc_flux_dir_vis, 0); - memset(sfc_flux_dif_nir, 0); - memset(sfc_flux_dif_vis, 0); - - // Threshold between visible and infrared is 0.7 micron, or 14286 cm^-1. - const real visible_wavenumber_threshold = 14286; - auto wavenumber_limits = k_dist_sw.get_band_lims_wavenumber(); - TIMED_KERNEL(parallel_for(SimpleBounds<1>(ncol), YAKL_LAMBDA(const int icol) { - for (int ibnd = 1; ibnd <= nswbands; ++ibnd) { - // Wavenumber is in the visible if it is above the visible wavenumber - // threshold, and in the infrared if it is below the threshold - const bool is_visible_wave1 = (wavenumber_limits(1, ibnd) > visible_wavenumber_threshold ? true : false); - const bool is_visible_wave2 = (wavenumber_limits(2, ibnd) > visible_wavenumber_threshold ? true : false); - - if (is_visible_wave1 && is_visible_wave2) { - - // Entire band is in the visible - sfc_flux_dir_vis(icol) += sw_bnd_flux_dir(icol,ktop,ibnd); - sfc_flux_dif_vis(icol) += sw_bnd_flux_dif(icol,ktop,ibnd); - - } else if (!is_visible_wave1 && !is_visible_wave2) { - - // Entire band is in the longwave (near-infrared) - sfc_flux_dir_nir(icol) += sw_bnd_flux_dir(icol,ktop,ibnd); - sfc_flux_dif_nir(icol) += sw_bnd_flux_dif(icol,ktop,ibnd); - - } else { - - // Band straddles the visible to near-infrared transition, so put half - // the flux in visible and half in near-infrared fluxes - sfc_flux_dir_vis(icol) += 0.5 * sw_bnd_flux_dir(icol,ktop,ibnd); - sfc_flux_dif_vis(icol) += 0.5 * sw_bnd_flux_dif(icol,ktop,ibnd); - sfc_flux_dir_nir(icol) += 0.5 * sw_bnd_flux_dir(icol,ktop,ibnd); - sfc_flux_dif_nir(icol) += 0.5 * sw_bnd_flux_dif(icol,ktop,ibnd); - } - } - })); -} - -void rrtmgp_main( - const int ncol, const int nlay, - real2d &p_lay, real2d &t_lay, real2d &p_lev, real2d &t_lev, - GasConcs &gas_concs, - real2d &sfc_alb_dir, real2d &sfc_alb_dif, real1d &mu0, - real2d &lwp, real2d &iwp, real2d &rel, real2d &rei, real2d &cldfrac, - real3d &aer_tau_sw, real3d &aer_ssa_sw, real3d &aer_asm_sw, real3d &aer_tau_lw, - real3d &cld_tau_sw_bnd, real3d &cld_tau_lw_bnd, - real3d &cld_tau_sw_gpt, - real3d &cld_tau_lw_gpt, - real2d &sw_flux_up, real2d &sw_flux_dn, real2d &sw_flux_dn_dir, - real2d &lw_flux_up, real2d &lw_flux_dn, - real2d &sw_clnclrsky_flux_up, real2d &sw_clnclrsky_flux_dn, real2d &sw_clnclrsky_flux_dn_dir, - real2d &sw_clrsky_flux_up, real2d &sw_clrsky_flux_dn, real2d &sw_clrsky_flux_dn_dir, - real2d &sw_clnsky_flux_up, real2d &sw_clnsky_flux_dn, real2d &sw_clnsky_flux_dn_dir, - real2d &lw_clnclrsky_flux_up, real2d &lw_clnclrsky_flux_dn, - real2d &lw_clrsky_flux_up, real2d &lw_clrsky_flux_dn, - real2d &lw_clnsky_flux_up, real2d &lw_clnsky_flux_dn, - real3d &sw_bnd_flux_up, real3d &sw_bnd_flux_dn, real3d &sw_bnd_flux_dn_dir, - real3d &lw_bnd_flux_up, real3d &lw_bnd_flux_dn, - const Real tsi_scaling, - const std::shared_ptr& logger, - const bool extra_clnclrsky_diag, const bool extra_clnsky_diag) { - -#ifdef SCREAM_RRTMGP_DEBUG - // Sanity check inputs, and possibly repair - check_range(t_lay , k_dist_sw.get_temp_min(), k_dist_sw.get_temp_max(), "rrtmgp_main::t_lay"); - check_range(t_lev , k_dist_sw.get_temp_min(), k_dist_sw.get_temp_max(), "rrtmgp_main::t_lev"); - check_range(p_lay , k_dist_sw.get_press_min(), k_dist_sw.get_press_max(), "rrtmgp_main::p_lay"); - check_range(p_lev , k_dist_sw.get_press_min(), k_dist_sw.get_press_max(), "rrtmgp_main::p_lev"); - check_range(sfc_alb_dir, 0, 1, "rrtmgp_main::sfc_alb_dir"); - check_range(sfc_alb_dif, 0, 1, "rrtmgp_main::sfc_alb_dif"); - check_range(mu0 , 0, 1, "rrtmgp_main::mu0"); - check_range(lwp , 0, std::numeric_limits::max(), "rrtmgp_main::lwp"); - check_range(iwp , 0, std::numeric_limits::max(), "rrtmgp_main::iwp"); - check_range(rel , 0, std::numeric_limits::max(), "rrtmgp_main::rel"); - check_range(rei , 0, std::numeric_limits::max(), "rrtmgp_main::rei"); -#endif - - // Setup pointers to RRTMGP SW fluxes - FluxesByband fluxes_sw; - fluxes_sw.flux_up = sw_flux_up; - fluxes_sw.flux_dn = sw_flux_dn; - fluxes_sw.flux_dn_dir = sw_flux_dn_dir; - fluxes_sw.bnd_flux_up = sw_bnd_flux_up; - fluxes_sw.bnd_flux_dn = sw_bnd_flux_dn; - fluxes_sw.bnd_flux_dn_dir = sw_bnd_flux_dn_dir; - // Clean-clear-sky - FluxesBroadband clnclrsky_fluxes_sw; - clnclrsky_fluxes_sw.flux_up = sw_clnclrsky_flux_up; - clnclrsky_fluxes_sw.flux_dn = sw_clnclrsky_flux_dn; - clnclrsky_fluxes_sw.flux_dn_dir = sw_clnclrsky_flux_dn_dir; - // Clear-sky - FluxesBroadband clrsky_fluxes_sw; - clrsky_fluxes_sw.flux_up = sw_clrsky_flux_up; - clrsky_fluxes_sw.flux_dn = sw_clrsky_flux_dn; - clrsky_fluxes_sw.flux_dn_dir = sw_clrsky_flux_dn_dir; - // Clean-sky - FluxesBroadband clnsky_fluxes_sw; - clnsky_fluxes_sw.flux_up = sw_clnsky_flux_up; - clnsky_fluxes_sw.flux_dn = sw_clnsky_flux_dn; - clnsky_fluxes_sw.flux_dn_dir = sw_clnsky_flux_dn_dir; - - // Setup pointers to RRTMGP LW fluxes - FluxesByband fluxes_lw; - fluxes_lw.flux_up = lw_flux_up; - fluxes_lw.flux_dn = lw_flux_dn; - fluxes_lw.bnd_flux_up = lw_bnd_flux_up; - fluxes_lw.bnd_flux_dn = lw_bnd_flux_dn; - // Clean-clear-sky - FluxesBroadband clnclrsky_fluxes_lw; - clnclrsky_fluxes_lw.flux_up = lw_clnclrsky_flux_up; - clnclrsky_fluxes_lw.flux_dn = lw_clnclrsky_flux_dn; - // Clear-sky - FluxesBroadband clrsky_fluxes_lw; - clrsky_fluxes_lw.flux_up = lw_clrsky_flux_up; - clrsky_fluxes_lw.flux_dn = lw_clrsky_flux_dn; - // Clean-sky - FluxesBroadband clnsky_fluxes_lw; - clnsky_fluxes_lw.flux_up = lw_clnsky_flux_up; - clnsky_fluxes_lw.flux_dn = lw_clnsky_flux_dn; - - auto nswbands = k_dist_sw.get_nband(); - auto nlwbands = k_dist_lw.get_nband(); - - // Setup aerosol optical properties - OpticalProps2str aerosol_sw; - OpticalProps1scl aerosol_lw; - aerosol_sw.init(k_dist_sw.get_band_lims_wavenumber()); - aerosol_sw.alloc_2str(ncol, nlay); - TIMED_KERNEL(parallel_for(SimpleBounds<3>(nswbands,nlay,ncol) , YAKL_LAMBDA (int ibnd, int ilay, int icol) { - aerosol_sw.tau(icol,ilay,ibnd) = aer_tau_sw(icol,ilay,ibnd); - aerosol_sw.ssa(icol,ilay,ibnd) = aer_ssa_sw(icol,ilay,ibnd); - aerosol_sw.g (icol,ilay,ibnd) = aer_asm_sw(icol,ilay,ibnd); - })); - aerosol_lw.init(k_dist_lw.get_band_lims_wavenumber()); - aerosol_lw.alloc_1scl(ncol, nlay); - TIMED_KERNEL(parallel_for(SimpleBounds<3>(nlwbands,nlay,ncol) , YAKL_LAMBDA (int ibnd, int ilay, int icol) { - aerosol_lw.tau(icol,ilay,ibnd) = aer_tau_lw(icol,ilay,ibnd); - })); - -#ifdef SCREAM_RRTMGP_DEBUG - // Check aerosol optical properties - // NOTE: these should already have been checked by precondition checks, but someday we might have - // non-trivial aerosol optics, so this is still good to do here. - check_range(aerosol_sw.tau, 0, 1e3, "rrtmgp_main:aerosol_sw.tau"); - check_range(aerosol_sw.ssa, 0, 1, "rrtmgp_main:aerosol_sw.ssa"); //, "aerosol_optics_sw.ssa"); - check_range(aerosol_sw.g , -1, 1, "rrtmgp_main:aerosol_sw.g "); //, "aerosol_optics_sw.g" ); - check_range(aerosol_lw.tau, 0, 1e3, "rrtmgp_main:aerosol_lw.tau"); -#endif - - // Convert cloud physical properties to optical properties for input to RRTMGP - OpticalProps2str clouds_sw = get_cloud_optics_sw(ncol, nlay, cloud_optics_sw, k_dist_sw, lwp, iwp, rel, rei); - OpticalProps1scl clouds_lw = get_cloud_optics_lw(ncol, nlay, cloud_optics_lw, k_dist_lw, lwp, iwp, rel, rei); - clouds_sw.tau.deep_copy_to(cld_tau_sw_bnd); - clouds_lw.tau.deep_copy_to(cld_tau_lw_bnd); - - // Do subcolumn sampling to map bands -> gpoints based on cloud fraction and overlap assumption; - // This implements the Monte Carlo Independing Column Approximation by mapping only a single - // subcolumn (cloud state) to each gpoint. - auto nswgpts = k_dist_sw.get_ngpt(); - auto clouds_sw_gpt = get_subsampled_clouds(ncol, nlay, nswbands, nswgpts, clouds_sw, k_dist_sw, cldfrac, p_lay); - // Longwave - auto nlwgpts = k_dist_lw.get_ngpt(); - auto clouds_lw_gpt = get_subsampled_clouds(ncol, nlay, nlwbands, nlwgpts, clouds_lw, k_dist_lw, cldfrac, p_lay); - - // Copy cloud properties to outputs (is this needed, or can we just use pointers?) - // Alternatively, just compute and output a subcolumn cloud mask - TIMED_KERNEL(parallel_for(SimpleBounds<3>(nswgpts, nlay, ncol), YAKL_LAMBDA (int igpt, int ilay, int icol) { - cld_tau_sw_gpt(icol,ilay,igpt) = clouds_sw_gpt.tau(icol,ilay,igpt); - })); - TIMED_KERNEL(parallel_for(SimpleBounds<3>(nlwgpts, nlay, ncol), YAKL_LAMBDA (int igpt, int ilay, int icol) { - cld_tau_lw_gpt(icol,ilay,igpt) = clouds_lw_gpt.tau(icol,ilay,igpt); - })); - -#ifdef SCREAM_RRTMGP_DEBUG - // Perform checks on optics; these would be caught by RRTMGP_EXPENSIVE_CHECKS in the RRTMGP code, - // but we might want to provide additional debug info here. NOTE: we may actually want to move this - // up higher in the code, I think optical props should go up higher since optical props are kind of - // a parameterization of their own, and we might want to swap different choices. These checks go here - // only because we need to run them on computed optical props, so if the optical props themselves get - // computed up higher, then perform these checks higher as well - check_range(clouds_sw.tau, 0, std::numeric_limits::max(), "rrtmgp_main:clouds_sw.tau"); - check_range(clouds_sw.ssa, 0, 1, "rrtmgp_main:clouds_sw.ssa"); - check_range(clouds_sw.g , -1, 1, "rrtmgp_main:clouds_sw.g "); - check_range(clouds_sw.tau, 0, std::numeric_limits::max(), "rrtmgp_main:clouds_sw.tau"); -#endif - - // Do shortwave - rrtmgp_sw( - ncol, nlay, - k_dist_sw, p_lay, t_lay, p_lev, t_lev, gas_concs, - sfc_alb_dir, sfc_alb_dif, mu0, aerosol_sw, clouds_sw_gpt, - fluxes_sw, clnclrsky_fluxes_sw, clrsky_fluxes_sw, clnsky_fluxes_sw, - tsi_scaling, logger, - extra_clnclrsky_diag, extra_clnsky_diag - ); - - // Do longwave - rrtmgp_lw( - ncol, nlay, - k_dist_lw, p_lay, t_lay, p_lev, t_lev, gas_concs, - aerosol_lw, clouds_lw_gpt, - fluxes_lw, clnclrsky_fluxes_lw, clrsky_fluxes_lw, clnsky_fluxes_lw, - extra_clnclrsky_diag, extra_clnsky_diag - ); - -} - -int3d get_subcolumn_mask(const int ncol, const int nlay, const int ngpt, real2d &cldf, const int overlap_option, int1d &seeds) { - - // Routine will return subcolumn mask with values of 0 indicating no cloud, 1 indicating cloud - auto subcolumn_mask = int3d("subcolumn_mask", ncol, nlay, ngpt); - - // Subcolumn generators are a means for producing a variable x(i,j,k), where - // - // c(i,j,k) = 1 for x(i,j,k) > 1 - cldf(i,j) - // c(i,j,k) = 0 for x(i,j,k) <= 1 - cldf(i,j) - // - // I am going to call this "cldx" to be just slightly less ambiguous - auto cldx = real3d("cldx", ncol, nlay, ngpt); - - // Apply overlap assumption to set cldx - if (overlap_option == 0) { // Dummy mask, always cloudy - memset(cldx, 1); - } else { // Default case, maximum-random overlap - // Maximum-random overlap: - // Uses essentially the algorithm described in eq (14) in Raisanen et al. 2004, - // https://rmets.onlinelibrary.wiley.com/doi/epdf/10.1256/qj.03.99. Also the same - // algorithm used in RRTMG implementation of maximum-random overlap (see - // https://github.com/AER-RC/RRTMG_SW/blob/master/src/mcica_subcol_gen_sw.f90) - // - // First, fill cldx with random numbers. Need to use a unique seed for each column! - TIMED_KERNEL(parallel_for(SimpleBounds<1>(ncol), YAKL_LAMBDA(int icol) { - yakl::Random rand(seeds(icol)); - for (int igpt = 1; igpt <= ngpt; igpt++) { - for (int ilay = 1; ilay <= nlay; ilay++) { - cldx(icol,ilay,igpt) = rand.genFP(); - } - } - })); - // Step down columns and apply algorithm from eq (14) - TIMED_KERNEL(parallel_for(SimpleBounds<2>(ngpt,ncol), YAKL_LAMBDA(int igpt, int icol) { - for (int ilay = 2; ilay <= nlay; ilay++) { - // Check cldx in level above and see if it satisfies conditions to create a cloudy subcolumn - if (cldx(icol,ilay-1,igpt) > 1.0 - cldf(icol,ilay-1)) { - // Cloudy subcolumn above, use same random number here so that clouds in these two adjacent - // layers are maximimally overlapped - cldx(icol,ilay,igpt) = cldx(icol,ilay-1,igpt); - } else { - // Cloud-less above, use new random number so that clouds are distributed - // randomly in this layer. Need to scale new random number to range - // [0, 1.0 - cldf(ilay-1)] because we have artifically changed the distribution - // of random numbers in this layer with the above branch of the conditional, - // which would otherwise inflate cloud fraction in this layer. - cldx(icol,ilay,igpt) = cldx(icol,ilay ,igpt) * (1.0 - cldf(icol,ilay-1)); - } - } - })); - } - - // Use cldx array to create subcolumn mask - TIMED_KERNEL(parallel_for(SimpleBounds<3>(ngpt,nlay,ncol), YAKL_LAMBDA(int igpt, int ilay, int icol) { - if (cldx(icol,ilay,igpt) > 1.0 - cldf(icol,ilay)) { - subcolumn_mask(icol,ilay,igpt) = 1; - } else { - subcolumn_mask(icol,ilay,igpt) = 0; - } - })); - return subcolumn_mask; -} - -void rrtmgp_sw( - const int ncol, const int nlay, - GasOpticsRRTMGP &k_dist, - real2d &p_lay, real2d &t_lay, real2d &p_lev, real2d &t_lev, - GasConcs &gas_concs, - real2d &sfc_alb_dir, real2d &sfc_alb_dif, real1d &mu0, - OpticalProps2str &aerosol, OpticalProps2str &clouds, - FluxesByband &fluxes, FluxesBroadband &clnclrsky_fluxes, FluxesBroadband &clrsky_fluxes, FluxesBroadband &clnsky_fluxes, - const Real tsi_scaling, - const std::shared_ptr& logger, - const bool extra_clnclrsky_diag, const bool extra_clnsky_diag) { - - // Get problem sizes - int nbnd = k_dist.get_nband(); - int ngpt = k_dist.get_ngpt(); - int ngas = gas_concs.get_num_gases(); - - // Associate local pointers for fluxes - auto &flux_up = fluxes.flux_up; - auto &flux_dn = fluxes.flux_dn; - auto &flux_dn_dir = fluxes.flux_dn_dir; - auto &bnd_flux_up = fluxes.bnd_flux_up; - auto &bnd_flux_dn = fluxes.bnd_flux_dn; - auto &bnd_flux_dn_dir = fluxes.bnd_flux_dn_dir; - auto &clnclrsky_flux_up = clnclrsky_fluxes.flux_up; - auto &clnclrsky_flux_dn = clnclrsky_fluxes.flux_dn; - auto &clnclrsky_flux_dn_dir = clnclrsky_fluxes.flux_dn_dir; - auto &clrsky_flux_up = clrsky_fluxes.flux_up; - auto &clrsky_flux_dn = clrsky_fluxes.flux_dn; - auto &clrsky_flux_dn_dir = clrsky_fluxes.flux_dn_dir; - auto &clnsky_flux_up = clnsky_fluxes.flux_up; - auto &clnsky_flux_dn = clnsky_fluxes.flux_dn; - auto &clnsky_flux_dn_dir = clnsky_fluxes.flux_dn_dir; - - // Reset fluxes to zero - TIMED_KERNEL(parallel_for(SimpleBounds<2>(nlay+1,ncol), YAKL_LAMBDA(int ilev, int icol) { - flux_up (icol,ilev) = 0; - flux_dn (icol,ilev) = 0; - flux_dn_dir(icol,ilev) = 0; - clnclrsky_flux_up (icol,ilev) = 0; - clnclrsky_flux_dn (icol,ilev) = 0; - clnclrsky_flux_dn_dir(icol,ilev) = 0; - clrsky_flux_up (icol,ilev) = 0; - clrsky_flux_dn (icol,ilev) = 0; - clrsky_flux_dn_dir(icol,ilev) = 0; - clnsky_flux_up (icol,ilev) = 0; - clnsky_flux_dn (icol,ilev) = 0; - clnsky_flux_dn_dir(icol,ilev) = 0; - })); - TIMED_KERNEL(parallel_for(SimpleBounds<3>(nbnd,nlay+1,ncol), YAKL_LAMBDA(int ibnd, int ilev, int icol) { - bnd_flux_up (icol,ilev,ibnd) = 0; - bnd_flux_dn (icol,ilev,ibnd) = 0; - bnd_flux_dn_dir(icol,ilev,ibnd) = 0; - })); - - // Get daytime indices - auto dayIndices = int1d("dayIndices", ncol); - memset(dayIndices, -1); - // Loop below has to be done on host, so create host copies - // TODO: there is probably a way to do this on the device - auto dayIndices_h = dayIndices.createHostCopy(); - auto mu0_h = mu0.createHostCopy(); - int nday = 0; - for (int icol = 1; icol <= ncol; icol++) { - if (mu0_h(icol) > 0) { - nday++; - dayIndices_h(nday) = icol; - } - } - - // Copy data back to the device - dayIndices_h.deep_copy_to(dayIndices); - if (nday == 0) { - // No daytime columns in this chunk, skip the rest of this routine - return; - } - - // Subset mu0 - auto mu0_day = real1d("mu0_day", nday); - TIMED_KERNEL(parallel_for(SimpleBounds<1>(nday), YAKL_LAMBDA(int iday) { - mu0_day(iday) = mu0(dayIndices(iday)); - })); - - // subset state variables - auto p_lay_day = real2d("p_lay_day", nday, nlay); - auto t_lay_day = real2d("t_lay_day", nday, nlay); - TIMED_KERNEL(parallel_for(SimpleBounds<2>(nlay,nday), YAKL_LAMBDA(int ilay, int iday) { - p_lay_day(iday,ilay) = p_lay(dayIndices(iday),ilay); - t_lay_day(iday,ilay) = t_lay(dayIndices(iday),ilay); - })); - auto p_lev_day = real2d("p_lev_day", nday, nlay+1); - auto t_lev_day = real2d("t_lev_day", nday, nlay+1); - TIMED_KERNEL(parallel_for(SimpleBounds<2>(nlay+1,nday), YAKL_LAMBDA(int ilev, int iday) { - p_lev_day(iday,ilev) = p_lev(dayIndices(iday),ilev); - t_lev_day(iday,ilev) = t_lev(dayIndices(iday),ilev); - })); - - // Subset gases - auto gas_names = gas_concs.get_gas_names(); - GasConcs gas_concs_day; - gas_concs_day.init(gas_names, nday, nlay); - for (int igas = 0; igas < ngas; igas++) { - auto vmr_day = real2d("vmr_day", nday, nlay); - auto vmr = real2d("vmr" , ncol, nlay); - gas_concs.get_vmr(gas_names[igas], vmr); - TIMED_KERNEL(parallel_for(SimpleBounds<2>(nlay,nday), YAKL_LAMBDA(int ilay, int iday) { - vmr_day(iday,ilay) = vmr(dayIndices(iday),ilay); - })); - gas_concs_day.set_vmr(gas_names[igas], vmr_day); - } - - // Subset aerosol optics - OpticalProps2str aerosol_day; - aerosol_day.init(k_dist.get_band_lims_wavenumber()); - aerosol_day.alloc_2str(nday, nlay); - TIMED_KERNEL(parallel_for(SimpleBounds<3>(nbnd,nlay,nday), YAKL_LAMBDA(int ibnd, int ilay, int iday) { - aerosol_day.tau(iday,ilay,ibnd) = aerosol.tau(dayIndices(iday),ilay,ibnd); - aerosol_day.ssa(iday,ilay,ibnd) = aerosol.ssa(dayIndices(iday),ilay,ibnd); - aerosol_day.g (iday,ilay,ibnd) = aerosol.g (dayIndices(iday),ilay,ibnd); - })); - - // Subset cloud optics - // TODO: nbnd -> ngpt once we pass sub-sampled cloud state - OpticalProps2str clouds_day; - clouds_day.init(k_dist.get_band_lims_wavenumber(), k_dist.get_band_lims_gpoint()); - clouds_day.alloc_2str(nday, nlay); - TIMED_KERNEL(parallel_for(SimpleBounds<3>(ngpt,nlay,nday), YAKL_LAMBDA(int igpt, int ilay, int iday) { - clouds_day.tau(iday,ilay,igpt) = clouds.tau(dayIndices(iday),ilay,igpt); - clouds_day.ssa(iday,ilay,igpt) = clouds.ssa(dayIndices(iday),ilay,igpt); - clouds_day.g (iday,ilay,igpt) = clouds.g (dayIndices(iday),ilay,igpt); - })); - - // RRTMGP assumes surface albedos have a screwy dimension ordering - // for some strange reason, so we need to transpose these; also do - // daytime subsetting in the same kernel - real2d sfc_alb_dir_T("sfc_alb_dir", nbnd, nday); - real2d sfc_alb_dif_T("sfc_alb_dif", nbnd, nday); - TIMED_KERNEL(parallel_for(SimpleBounds<2>(nbnd,nday), YAKL_LAMBDA(int ibnd, int icol) { - sfc_alb_dir_T(ibnd,icol) = sfc_alb_dir(dayIndices(icol),ibnd); - sfc_alb_dif_T(ibnd,icol) = sfc_alb_dif(dayIndices(icol),ibnd); - })); - - // Temporaries we need for daytime-only fluxes - auto flux_up_day = real2d("flux_up_day", nday, nlay+1); - auto flux_dn_day = real2d("flux_dn_day", nday, nlay+1); - auto flux_dn_dir_day = real2d("flux_dn_dir_day", nday, nlay+1); - auto bnd_flux_up_day = real3d("bnd_flux_up_day", nday, nlay+1, nbnd); - auto bnd_flux_dn_day = real3d("bnd_flux_dn_day", nday, nlay+1, nbnd); - auto bnd_flux_dn_dir_day = real3d("bnd_flux_dn_dir_day", nday, nlay+1, nbnd); - FluxesByband fluxes_day; - fluxes_day.flux_up = flux_up_day; - fluxes_day.flux_dn = flux_dn_day; - fluxes_day.flux_dn_dir = flux_dn_dir_day; - fluxes_day.bnd_flux_up = bnd_flux_up_day; - fluxes_day.bnd_flux_dn = bnd_flux_dn_day; - fluxes_day.bnd_flux_dn_dir = bnd_flux_dn_dir_day; - - // Allocate space for optical properties - OpticalProps2str optics; - optics.alloc_2str(nday, nlay, k_dist); - - OpticalProps2str optics_no_aerosols; - if (extra_clnsky_diag) { - // Allocate space for optical properties (no aerosols) - optics_no_aerosols.alloc_2str(nday, nlay, k_dist); - } - - // Limit temperatures for gas optics look-up tables - auto t_lay_limited = real2d("t_lay_limited", nday, nlay); - limit_to_bounds(t_lay_day, k_dist_sw.get_temp_min(), k_dist_sw.get_temp_max(), t_lay_limited); - - // Do gas optics - real2d toa_flux("toa_flux", nday, ngpt); - auto p_lay_host = p_lay.createHostCopy(); - bool top_at_1 = p_lay_host(1, 1) < p_lay_host(1, nlay); - - k_dist.gas_optics(nday, nlay, top_at_1, p_lay_day, p_lev_day, t_lay_limited, gas_concs_day, optics, toa_flux); - if (extra_clnsky_diag) { - k_dist.gas_optics(nday, nlay, top_at_1, p_lay_day, p_lev_day, t_lay_limited, gas_concs_day, optics_no_aerosols, toa_flux); - } - -#ifdef SCREAM_RRTMGP_DEBUG - // Check gas optics - check_range(optics.tau, 0, std::numeric_limits::max(), "rrtmgp_sw:optics.tau"); - check_range(optics.ssa, 0, 1, "rrtmgp_sw:optics.ssa"); //, "optics.ssa"); - check_range(optics.g , -1, 1, "rrtmgp_sw:optics.g "); //, "optics.g" ); -#endif - - // Apply tsi_scaling - TIMED_KERNEL(parallel_for(SimpleBounds<2>(ngpt,nday), YAKL_LAMBDA(int igpt, int iday) { - toa_flux(iday,igpt) = tsi_scaling * toa_flux(iday,igpt); - })); - - if (extra_clnclrsky_diag) { - // Compute clear-clean-sky (just gas) fluxes on daytime columns - rte_sw(optics, top_at_1, mu0_day, toa_flux, sfc_alb_dir_T, sfc_alb_dif_T, fluxes_day); - // Expand daytime fluxes to all columns - TIMED_KERNEL(parallel_for(SimpleBounds<2>(nlay+1,nday), YAKL_LAMBDA(int ilev, int iday) { - int icol = dayIndices(iday); - clnclrsky_flux_up (icol,ilev) = flux_up_day (iday,ilev); - clnclrsky_flux_dn (icol,ilev) = flux_dn_day (iday,ilev); - clnclrsky_flux_dn_dir(icol,ilev) = flux_dn_dir_day(iday,ilev); - })); - } - - // Combine gas and aerosol optics - aerosol_day.delta_scale(); - aerosol_day.increment(optics); - - // Compute clearsky (gas + aerosol) fluxes on daytime columns - rte_sw(optics, top_at_1, mu0_day, toa_flux, sfc_alb_dir_T, sfc_alb_dif_T, fluxes_day); - - // Expand daytime fluxes to all columns - TIMED_KERNEL(parallel_for(SimpleBounds<2>(nlay+1,nday), YAKL_LAMBDA(int ilev, int iday) { - int icol = dayIndices(iday); - clrsky_flux_up (icol,ilev) = flux_up_day (iday,ilev); - clrsky_flux_dn (icol,ilev) = flux_dn_day (iday,ilev); - clrsky_flux_dn_dir(icol,ilev) = flux_dn_dir_day(iday,ilev); - })); - - // Now merge in cloud optics and do allsky calculations - - // Combine gas and cloud optics - clouds_day.delta_scale(); - clouds_day.increment(optics); - // Compute fluxes on daytime columns - rte_sw(optics, top_at_1, mu0_day, toa_flux, sfc_alb_dir_T, sfc_alb_dif_T, fluxes_day); - // Expand daytime fluxes to all columns - TIMED_KERNEL(parallel_for(SimpleBounds<2>(nlay+1,nday), YAKL_LAMBDA(int ilev, int iday) { - int icol = dayIndices(iday); - flux_up (icol,ilev) = flux_up_day (iday,ilev); - flux_dn (icol,ilev) = flux_dn_day (iday,ilev); - flux_dn_dir(icol,ilev) = flux_dn_dir_day(iday,ilev); - })); - TIMED_KERNEL(parallel_for(SimpleBounds<3>(nbnd,nlay+1,nday), YAKL_LAMBDA(int ibnd, int ilev, int iday) { - int icol = dayIndices(iday); - bnd_flux_up (icol,ilev,ibnd) = bnd_flux_up_day (iday,ilev,ibnd); - bnd_flux_dn (icol,ilev,ibnd) = bnd_flux_dn_day (iday,ilev,ibnd); - bnd_flux_dn_dir(icol,ilev,ibnd) = bnd_flux_dn_dir_day(iday,ilev,ibnd); - })); - - if (extra_clnsky_diag) { - // First increment clouds in optics_no_aerosols - clouds_day.increment(optics_no_aerosols); - // Compute cleansky (gas + clouds) fluxes on daytime columns - rte_sw(optics_no_aerosols, top_at_1, mu0_day, toa_flux, sfc_alb_dir_T, sfc_alb_dif_T, fluxes_day); - // Expand daytime fluxes to all columns - TIMED_KERNEL(parallel_for(SimpleBounds<2>(nlay+1,nday), YAKL_LAMBDA(int ilev, int iday) { - int icol = dayIndices(iday); - clnsky_flux_up (icol,ilev) = flux_up_day (iday,ilev); - clnsky_flux_dn (icol,ilev) = flux_dn_day (iday,ilev); - clnsky_flux_dn_dir(icol,ilev) = flux_dn_dir_day(iday,ilev); - })); - } -} - -void rrtmgp_lw( - const int ncol, const int nlay, - GasOpticsRRTMGP &k_dist, - real2d &p_lay, real2d &t_lay, real2d &p_lev, real2d &t_lev, - GasConcs &gas_concs, - OpticalProps1scl &aerosol, - OpticalProps1scl &clouds, - FluxesByband &fluxes, FluxesBroadband &clnclrsky_fluxes, FluxesBroadband &clrsky_fluxes, FluxesBroadband &clnsky_fluxes, - const bool extra_clnclrsky_diag, const bool extra_clnsky_diag) { - - // Problem size - int nbnd = k_dist.get_nband(); - - // Associate local pointers for fluxes - auto &flux_up = fluxes.flux_up; - auto &flux_dn = fluxes.flux_dn; - auto &bnd_flux_up = fluxes.bnd_flux_up; - auto &bnd_flux_dn = fluxes.bnd_flux_dn; - auto &clnclrsky_flux_up = clnclrsky_fluxes.flux_up; - auto &clnclrsky_flux_dn = clnclrsky_fluxes.flux_dn; - auto &clrsky_flux_up = clrsky_fluxes.flux_up; - auto &clrsky_flux_dn = clrsky_fluxes.flux_dn; - auto &clnsky_flux_up = clnsky_fluxes.flux_up; - auto &clnsky_flux_dn = clnsky_fluxes.flux_dn; - - // Reset fluxes to zero - TIMED_KERNEL(parallel_for( - SimpleBounds<2>(nlay + 1, ncol), YAKL_LAMBDA(int ilev, int icol) { - flux_up(icol, ilev) = 0; - flux_dn(icol, ilev) = 0; - clnclrsky_flux_up(icol, ilev) = 0; - clnclrsky_flux_dn(icol, ilev) = 0; - clrsky_flux_up(icol, ilev) = 0; - clrsky_flux_dn(icol, ilev) = 0; - clnsky_flux_up(icol, ilev) = 0; - clnsky_flux_dn(icol, ilev) = 0; - })); - TIMED_KERNEL(parallel_for( - SimpleBounds<3>(nbnd, nlay + 1, ncol), - YAKL_LAMBDA(int ibnd, int ilev, int icol) { - bnd_flux_up(icol, ilev, ibnd) = 0; - bnd_flux_dn(icol, ilev, ibnd) = 0; - })); - - // Allocate space for optical properties - OpticalProps1scl optics; - optics.alloc_1scl(ncol, nlay, k_dist); - OpticalProps1scl optics_no_aerosols; - if (extra_clnsky_diag) { - // Allocate space for optical properties (no aerosols) - optics_no_aerosols.alloc_1scl(ncol, nlay, k_dist); - } - - // Boundary conditions - SourceFuncLW lw_sources; - lw_sources.alloc(ncol, nlay, k_dist); - real1d t_sfc ("t_sfc" ,ncol); - real2d emis_sfc("emis_sfc",nbnd,ncol); - - // Surface temperature - auto p_lay_host = p_lay.createHostCopy(); - bool top_at_1 = p_lay_host(1, 1) < p_lay_host(1, nlay); - TIMED_KERNEL(parallel_for(SimpleBounds<1>(ncol), YAKL_LAMBDA(int icol) { - t_sfc(icol) = t_lev(icol, merge(nlay+1, 1, top_at_1)); - })); - memset(emis_sfc , 0.98_wp); - - // Get Gaussian quadrature weights - // TODO: move this crap out of userland! - // Weights and angle secants for first order (k=1) Gaussian quadrature. - // Values from Table 2, Clough et al, 1992, doi:10.1029/92JD01419 - // after Abramowitz & Stegun 1972, page 921 - int constexpr max_gauss_pts = 4; - realHost2d gauss_Ds_host ("gauss_Ds" ,max_gauss_pts,max_gauss_pts); - gauss_Ds_host(1,1) = 1.66_wp ; gauss_Ds_host(2,1) = 0._wp; gauss_Ds_host(3,1) = 0._wp; gauss_Ds_host(4,1) = 0._wp; - gauss_Ds_host(1,2) = 1.18350343_wp; gauss_Ds_host(2,2) = 2.81649655_wp; gauss_Ds_host(3,2) = 0._wp; gauss_Ds_host(4,2) = 0._wp; - gauss_Ds_host(1,3) = 1.09719858_wp; gauss_Ds_host(2,3) = 1.69338507_wp; gauss_Ds_host(3,3) = 4.70941630_wp; gauss_Ds_host(4,3) = 0._wp; - gauss_Ds_host(1,4) = 1.06056257_wp; gauss_Ds_host(2,4) = 1.38282560_wp; gauss_Ds_host(3,4) = 2.40148179_wp; gauss_Ds_host(4,4) = 7.15513024_wp; - - realHost2d gauss_wts_host("gauss_wts",max_gauss_pts,max_gauss_pts); - gauss_wts_host(1,1) = 0.5_wp ; gauss_wts_host(2,1) = 0._wp ; gauss_wts_host(3,1) = 0._wp ; gauss_wts_host(4,1) = 0._wp ; - gauss_wts_host(1,2) = 0.3180413817_wp; gauss_wts_host(2,2) = 0.1819586183_wp; gauss_wts_host(3,2) = 0._wp ; gauss_wts_host(4,2) = 0._wp ; - gauss_wts_host(1,3) = 0.2009319137_wp; gauss_wts_host(2,3) = 0.2292411064_wp; gauss_wts_host(3,3) = 0.0698269799_wp; gauss_wts_host(4,3) = 0._wp ; - gauss_wts_host(1,4) = 0.1355069134_wp; gauss_wts_host(2,4) = 0.2034645680_wp; gauss_wts_host(3,4) = 0.1298475476_wp; gauss_wts_host(4,4) = 0.0311809710_wp; - - real2d gauss_Ds ("gauss_Ds" ,max_gauss_pts,max_gauss_pts); - real2d gauss_wts("gauss_wts",max_gauss_pts,max_gauss_pts); - gauss_Ds_host .deep_copy_to(gauss_Ds ); - gauss_wts_host.deep_copy_to(gauss_wts); - - // Limit temperatures for gas optics look-up tables - auto t_lay_limited = real2d("t_lay_limited", ncol, nlay); - auto t_lev_limited = real2d("t_lev_limited", ncol, nlay+1); - limit_to_bounds(t_lay, k_dist_lw.get_temp_min(), k_dist_lw.get_temp_max(), t_lay_limited); - limit_to_bounds(t_lev, k_dist_lw.get_temp_min(), k_dist_lw.get_temp_max(), t_lev_limited); - - // Do gas optics - k_dist.gas_optics(ncol, nlay, top_at_1, p_lay, p_lev, t_lay_limited, t_sfc, gas_concs, optics, lw_sources, real2d(), t_lev_limited); - if (extra_clnsky_diag) { - k_dist.gas_optics(ncol, nlay, top_at_1, p_lay, p_lev, t_lay_limited, t_sfc, gas_concs, optics_no_aerosols, lw_sources, real2d(), t_lev_limited); - } - -#ifdef SCREAM_RRTMGP_DEBUG - // Check gas optics - check_range(optics.tau, 0, std::numeric_limits::max(), "rrtmgp_lw:optics.tau"); -#endif - - if (extra_clnclrsky_diag) { - // Compute clean-clear-sky fluxes before we add in aerosols and clouds - rte_lw(max_gauss_pts, gauss_Ds, gauss_wts, optics, top_at_1, lw_sources, emis_sfc, clnclrsky_fluxes); - } - - // Combine gas and aerosol optics - aerosol.increment(optics); - - // Compute clear-sky fluxes before we add in clouds - rte_lw(max_gauss_pts, gauss_Ds, gauss_wts, optics, top_at_1, lw_sources, emis_sfc, clrsky_fluxes); - - // Combine gas and cloud optics - clouds.increment(optics); - - // Compute allsky fluxes - rte_lw(max_gauss_pts, gauss_Ds, gauss_wts, optics, top_at_1, lw_sources, emis_sfc, fluxes); - - if (extra_clnsky_diag) { - // First increment clouds in optics_no_aerosols - clouds.increment(optics_no_aerosols); - // Compute clean-sky fluxes - rte_lw(max_gauss_pts, gauss_Ds, gauss_wts, optics_no_aerosols, top_at_1, lw_sources, emis_sfc, clnsky_fluxes); - } - -} - -void compute_cloud_area( - int ncol, int nlay, int ngpt, const Real pmin, const Real pmax, - const real2d& pmid, const real3d& cld_tau_gpt, real1d& cld_area) { - // Subcolumn binary cld mask; if any layers with pressure between pmin and pmax are cloudy - // then 2d subcol mask is 1, otherwise it is 0 - auto subcol_mask = real2d("subcol_mask", ncol, ngpt); - memset(subcol_mask, 0); - TIMED_KERNEL(yakl::fortran::parallel_for(SimpleBounds<3>(ngpt, nlay, ncol), YAKL_LAMBDA(int igpt, int ilay, int icol) { - // NOTE: using plev would need to assume level ordering (top to bottom or bottom to top), but - // using play/pmid does not - if (cld_tau_gpt(icol,ilay,igpt) > 0 && pmid(icol,ilay) >= pmin && pmid(icol,ilay) < pmax) { - subcol_mask(icol,igpt) = 1; - } - })); - // Compute average over subcols to get cloud area - auto ngpt_inv = 1.0 / ngpt; - memset(cld_area, 0); - TIMED_KERNEL(yakl::fortran::parallel_for(SimpleBounds<1>(ncol), YAKL_LAMBDA(int icol) { - // This loop needs to be serial because of the atomic reduction - for (int igpt = 1; igpt <= ngpt; ++igpt) { - cld_area(icol) += subcol_mask(icol,igpt) * ngpt_inv; - } - })); -} - -int get_wavelength_index_sw(double wavelength) { return get_wavelength_index(k_dist_sw, wavelength); } - -int get_wavelength_index_lw(double wavelength) { return get_wavelength_index(k_dist_lw, wavelength); } - -int get_wavelength_index(OpticalProps &kdist, double wavelength) { - // Get wavelength bounds for all wavelength bands - auto wavelength_bounds = kdist.get_band_lims_wavelength(); - - // Find the band index for the specified wavelength - // Note that bands are stored in wavenumber space, units of cm-1, so if we are passed wavelength - // in units of meters, we need a conversion factor of 10^2 - int nbnds = kdist.get_nband(); - yakl::ScalarLiveOut band_index(-1); - TIMED_KERNEL(yakl::fortran::parallel_for(SimpleBounds<1>(nbnds), YAKL_LAMBDA(int ibnd) { - if (wavelength_bounds(1,ibnd) < wavelength_bounds(2,ibnd)) { - if (wavelength_bounds(1,ibnd) <= wavelength * 1e2 && wavelength * 1e2 <= wavelength_bounds(2,ibnd)) { - band_index = ibnd; - } - } else { - if (wavelength_bounds(1,ibnd) >= wavelength * 1e2 && wavelength * 1e2 >= wavelength_bounds(2,ibnd)) { - band_index = ibnd; - } - } - })); - return band_index.hostRead(); -} - -void compute_aerocom_cloudtop( - int ncol, int nlay, const real2d &tmid, const real2d &pmid, - const real2d &p_del, const real2d &z_del, const real2d &qc, - const real2d &qi, const real2d &rel, const real2d &rei, - const real2d &cldfrac_tot, const real2d &nc, - real1d &T_mid_at_cldtop, real1d &p_mid_at_cldtop, - real1d &cldfrac_ice_at_cldtop, real1d &cldfrac_liq_at_cldtop, - real1d &cldfrac_tot_at_cldtop, real1d &cdnc_at_cldtop, - real1d &eff_radius_qc_at_cldtop, real1d &eff_radius_qi_at_cldtop) { - /* The goal of this routine is to calculate properties at cloud top - * based on the AeroCom recommendation. See reference for routine - * get_subcolumn_mask above, where equation 14 is used for the - * maximum-random overlap assumption for subcolumn generation. We use - * equation 13, the column counterpart. - */ - // Set outputs to zero - memset(T_mid_at_cldtop, 0.0); - memset(p_mid_at_cldtop, 0.0); - memset(cldfrac_ice_at_cldtop, 0.0); - memset(cldfrac_liq_at_cldtop, 0.0); - memset(cldfrac_tot_at_cldtop, 0.0); - memset(cdnc_at_cldtop, 0.0); - memset(eff_radius_qc_at_cldtop, 0.0); - memset(eff_radius_qi_at_cldtop, 0.0); - // Initialize the 1D "clear fraction" as 1 (totally clear) - auto aerocom_clr = real1d("aerocom_clr", ncol); - memset(aerocom_clr, 1.0); - // Get gravity acceleration constant from constants - using physconst = scream::physics::Constants; - // TODO: move tunable constant to namelist - constexpr real q_threshold = 0.0; // BAD_CONSTANT! - // TODO: move tunable constant to namelist - constexpr real cldfrac_tot_threshold = 0.001; // BAD_CONSTANT! - // Loop over all columns in parallel - TIMED_KERNEL(yakl::fortran::parallel_for( - SimpleBounds<1>(ncol), YAKL_LAMBDA(int icol) { - // Loop over all layers in serial (due to accumulative - // product), starting at 2 (second highest) layer because the - // highest is assumed to hav no clouds - for(int ilay = 2; ilay <= nlay; ++ilay) { - // Only do the calculation if certain conditions are met - if((qc(icol, ilay) + qi(icol, ilay)) > q_threshold && - (cldfrac_tot(icol, ilay) > cldfrac_tot_threshold)) { - /* PART I: Probabilistically determining cloud top */ - // Populate aerocom_tmp as the clear-sky fraction - // probability of this level, where aerocom_clr is that of - // the previous level - auto aerocom_tmp = - aerocom_clr(icol) * - (1.0 - ekat::impl::max(cldfrac_tot(icol, ilay - 1), - cldfrac_tot(icol, ilay))) / - (1.0 - ekat::impl::min(cldfrac_tot(icol, ilay - 1), - 1.0 - cldfrac_tot_threshold)); - // Temporary variable for probability "weights" - auto aerocom_wts = aerocom_clr(icol) - aerocom_tmp; - // Temporary variable for liquid "phase" - auto aerocom_phi = - qc(icol, ilay) / (qc(icol, ilay) + qi(icol, ilay)); - /* PART II: The inferred properties */ - /* In general, converting a 3D property X to a 2D cloud-top - * counterpart x follows: x(i) += X(i,k) * weights * Phase - * but X and Phase are not always needed */ - // T_mid_at_cldtop - T_mid_at_cldtop(icol) += tmid(icol, ilay) * aerocom_wts; - // p_mid_at_cldtop - p_mid_at_cldtop(icol) += pmid(icol, ilay) * aerocom_wts; - // cldfrac_ice_at_cldtop - cldfrac_ice_at_cldtop(icol) += - (1.0 - aerocom_phi) * aerocom_wts; - // cldfrac_liq_at_cldtop - cldfrac_liq_at_cldtop(icol) += aerocom_phi * aerocom_wts; - // cdnc_at_cldtop - /* We need to convert nc from 1/mass to 1/volume first, and - * from grid-mean to in-cloud, but after that, the - * calculation follows the general logic */ - auto cdnc = nc(icol, ilay) * p_del(icol, ilay) / - z_del(icol, ilay) / physconst::gravit / - cldfrac_tot(icol, ilay); - cdnc_at_cldtop(icol) += cdnc * aerocom_phi * aerocom_wts; - // eff_radius_qc_at_cldtop - eff_radius_qc_at_cldtop(icol) += - rel(icol, ilay) * aerocom_phi * aerocom_wts; - // eff_radius_qi_at_cldtop - eff_radius_qi_at_cldtop(icol) += - rei(icol, ilay) * (1.0 - aerocom_phi) * aerocom_wts; - // Reset aerocom_clr to aerocom_tmp to accumulate - aerocom_clr(icol) = aerocom_tmp; - } - } - // After the serial loop over levels, the cloudy fraction is - // defined as (1 - aerocom_clr). This is true because - // aerocom_clr is the result of accumulative probabilities - // (their products) - cldfrac_tot_at_cldtop(icol) = 1.0 - aerocom_clr(icol); - })); -} - -} // namespace rrtmgp -#endif } // namespace scream diff --git a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_interface.hpp b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_interface.hpp index fd762b671d71..af0d935f4bcc 100644 --- a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_interface.hpp +++ b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_interface.hpp @@ -21,9 +21,7 @@ #include "ekat/logging/ekat_logger.hpp" #include "ekat/util/ekat_math_utils.hpp" -#ifdef RRTMGP_ENABLE_KOKKOS #include "Kokkos_Random.hpp" -#endif namespace scream { @@ -32,133 +30,7 @@ void finalize_kls(); namespace rrtmgp { -#ifdef RRTMGP_ENABLE_YAKL -extern GasOpticsRRTMGP k_dist_sw; -extern GasOpticsRRTMGP k_dist_lw; - -extern CloudOptics cloud_optics_sw; -extern CloudOptics cloud_optics_lw; - -extern bool initialized; - -void rrtmgp_initialize( - GasConcs &gas_concs, - const std::string& coefficients_file_sw, const std::string& coefficients_file_lw, - const std::string& cloud_optics_file_sw, const std::string& cloud_optics_file_lw, - const std::shared_ptr& logger); - -void compute_band_by_band_surface_albedos( - const int ncol, const int nswbands, - real1d &sfc_alb_dir_vis, real1d &sfc_alb_dir_nir, - real1d &sfc_alb_dif_vis, real1d &sfc_alb_dif_nir, - real2d &sfc_alb_dir, real2d &sfc_alb_dif); - -void compute_broadband_surface_fluxes( - const int ncol, const int ktop, const int nswbands, - real3d &sw_bnd_flux_dir , real3d &sw_bnd_flux_dif , - real1d &sfc_flux_dir_vis, real1d &sfc_flux_dir_nir, - real1d &sfc_flux_dif_vis, real1d &sfc_flux_dif_nir); - -void rrtmgp_main( - const int ncol, const int nlay, - real2d &p_lay, real2d &t_lay, real2d &p_lev, real2d &t_lev, - GasConcs &gas_concs, - real2d &sfc_alb_dir, real2d &sfc_alb_dif, real1d &mu0, - real2d &lwp, real2d &iwp, real2d &rel, real2d &rei, real2d &cldfrac, - real3d &aer_tau_sw, real3d &aer_ssa_sw, real3d &aer_asm_sw, real3d &aer_tau_lw, - real3d &cld_tau_sw_bnd, real3d &cld_tau_lw_bnd, - real3d &cld_tau_sw_gpt, real3d &cld_tau_lw_gpt, - real2d &sw_flux_up, real2d &sw_flux_dn, real2d &sw_flux_dn_dir, - real2d &lw_flux_up, real2d &lw_flux_dn, - real2d &sw_clnclrsky_flux_up, real2d &sw_clnclrsky_flux_dn, real2d &sw_clnclrsky_flux_dn_dir, - real2d &sw_clrsky_flux_up, real2d &sw_clrsky_flux_dn, real2d &sw_clrsky_flux_dn_dir, - real2d &sw_clnsky_flux_up, real2d &sw_clnsky_flux_dn, real2d &sw_clnsky_flux_dn_dir, - real2d &lw_clnclrsky_flux_up, real2d &lw_clnclrsky_flux_dn, - real2d &lw_clrsky_flux_up, real2d &lw_clrsky_flux_dn, - real2d &lw_clnsky_flux_up, real2d &lw_clnsky_flux_dn, - real3d &sw_bnd_flux_up, real3d &sw_bnd_flux_dn, real3d &sw_bnd_flux_dn_dir, - real3d &lw_bnd_flux_up, real3d &lw_bnd_flux_dn, - const Real tsi_scaling, - const std::shared_ptr& logger, - const bool extra_clnclrsky_diag = false, const bool extra_clnsky_diag = false); - -void rrtmgp_finalize(); - -void rrtmgp_sw( - const int ncol, const int nlay, - GasOpticsRRTMGP &k_dist, - real2d &p_lay, real2d &t_lay, real2d &p_lev, real2d &t_lev, - GasConcs &gas_concs, - real2d &sfc_alb_dir, real2d &sfc_alb_dif, real1d &mu0, - OpticalProps2str &aerosol, OpticalProps2str &clouds, - FluxesByband &fluxes, FluxesBroadband &clnclrsky_fluxes, FluxesBroadband &clrsky_fluxes, FluxesBroadband &clnsky_fluxes, - const Real tsi_scaling, - const std::shared_ptr& logger, - const bool extra_clnclrsky_diag, const bool extra_clnsky_diag); - -void rrtmgp_lw( - const int ncol, const int nlay, - GasOpticsRRTMGP &k_dist, - real2d &p_lay, real2d &t_lay, real2d &p_lev, real2d &t_lev, - GasConcs &gas_concs, - OpticalProps1scl &aerosol, OpticalProps1scl &clouds, - FluxesByband &fluxes, FluxesBroadband &clnclrsky_fluxes, FluxesBroadband &clrsky_fluxes, FluxesBroadband &clnsky_fluxes, - const bool extra_clnclrsky_diag, const bool extra_clnsky_diag); - -int3d get_subcolumn_mask(const int ncol, const int nlay, const int ngpt, real2d &cldf, const int overlap_option, int1d &seeds); - -void compute_cloud_area( - int ncol, int nlay, int ngpt, Real pmin, Real pmax, - const real2d& pmid, const real3d& cld_tau_gpt, real1d& cld_area); - -void compute_aerocom_cloudtop( - int ncol, int nlay, const real2d &tmid, const real2d &pmid, - const real2d &p_del, const real2d &z_del, const real2d &qc, - const real2d &qi, const real2d &rel, const real2d &rei, - const real2d &cldfrac_tot, const real2d &nc, - real1d &T_mid_at_cldtop, real1d &p_mid_at_cldtop, - real1d &cldfrac_ice_at_cldtop, real1d &cldfrac_liq_at_cldtop, - real1d &cldfrac_tot_at_cldtop, real1d &cdnc_at_cldtop, - real1d &eff_radius_qc_at_cldtop, real1d &eff_radius_qi_at_cldtop); - -template -void mixing_ratio_to_cloud_mass( - yakl::Array const &mixing_ratio, - yakl::Array const &cloud_fraction, - yakl::Array const &dp, - yakl::Array &cloud_mass) { - int ncol = mixing_ratio.dimension[0]; - int nlay = mixing_ratio.dimension[1]; - using physconst = scream::physics::Constants; - TIMED_KERNEL(yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<2>(nlay, ncol), YAKL_LAMBDA(int ilay, int icol) { - // Compute in-cloud mixing ratio (mixing ratio of the cloudy part of the layer) - // NOTE: these thresholds (from E3SM) seem arbitrary, but included here for consistency - // This limits in-cloud mixing ratio to 0.005 kg/kg. According to note in cloud_diagnostics - // in EAM, this is consistent with limits in MG2. Is this true for P3? - if (cloud_fraction(icol,ilay) > 0) { - // Compute layer-integrated cloud mass (per unit area) - auto incloud_mixing_ratio = std::min(mixing_ratio(icol,ilay) / std::max(0.0001, cloud_fraction(icol,ilay)), 0.005); - cloud_mass(icol,ilay) = incloud_mixing_ratio * dp(icol,ilay) / physconst::gravit; - } else { - cloud_mass(icol,ilay) = 0; - } - })); -} - -template -void limit_to_bounds(S const &arr_in, T const lower, T const upper, S &arr_out) { - TIMED_KERNEL(yakl::c::parallel_for(arr_in.totElems(), YAKL_LAMBDA(int i) { - arr_out.data()[i] = std::min(std::max(arr_in.data()[i], lower), upper); - })); -} - -int get_wavelength_index(OpticalProps &kdist, double wavelength); -int get_wavelength_index_sw(double wavelength); -int get_wavelength_index_lw(double wavelength); -#endif // RRTMGP_ENABLE_YAKL - // New interface for Kokkos and flexible types -#ifdef RRTMGP_ENABLE_KOKKOS template struct rrtmgp_interface { @@ -1636,7 +1508,6 @@ static optical_props1_t get_subsampled_clouds( } }; // struct rrtmgp_interface -#endif // RRTMGP_ENABLE_KOKKOS } // namespace rrtmgp } // namespace scream diff --git a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp index 6b8f183d079b..d5abf2c4d3fb 100644 --- a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp +++ b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp @@ -13,9 +13,6 @@ #include "ekat/ekat_assert.hpp" #include "cpp/rrtmgp/mo_gas_concentrations.h" -#ifdef RRTMGP_ENABLE_YAKL -#include "YAKL.h" -#endif namespace scream { @@ -300,12 +297,7 @@ size_t RRTMGPRadiation::requested_buffer_size_in_bytes() const Buffer::num_3d_nlay_nswbands*m_col_chunk_size*(m_nlay)*m_nswbands + Buffer::num_3d_nlay_nlwbands*m_col_chunk_size*(m_nlay)*m_nlwbands + Buffer::num_3d_nlay_nswgpts*m_col_chunk_size*(m_nlay)*m_nswgpts + - Buffer::num_3d_nlay_nlwgpts*m_col_chunk_size*(m_nlay)*m_nlwgpts * -#if defined(RRTMGP_ENABLE_YAKL) && defined(RRTMGP_ENABLE_KOKKOS) - 2; -#else - 1; -#endif + Buffer::num_3d_nlay_nlwgpts*m_col_chunk_size*(m_nlay)*m_nlwgpts; return interface_request * sizeof(Real); } // RRTMGPRadiation::requested_buffer_size @@ -317,149 +309,6 @@ void RRTMGPRadiation::init_buffers(const ATMBufferManager &buffer_manager) Real* mem = reinterpret_cast(buffer_manager.get_memory()); -#ifdef RRTMGP_ENABLE_YAKL - // 1d arrays - m_buffer.mu0 = decltype(m_buffer.mu0)("mu0", mem, m_col_chunk_size); - mem += m_buffer.mu0.totElems(); - m_buffer.sfc_alb_dir_vis = decltype(m_buffer.sfc_alb_dir_vis)("sfc_alb_dir_vis", mem, m_col_chunk_size); - mem += m_buffer.sfc_alb_dir_vis.totElems(); - m_buffer.sfc_alb_dir_nir = decltype(m_buffer.sfc_alb_dir_nir)("sfc_alb_dir_nir", mem, m_col_chunk_size); - mem += m_buffer.sfc_alb_dir_nir.totElems(); - m_buffer.sfc_alb_dif_vis = decltype(m_buffer.sfc_alb_dif_vis)("sfc_alb_dif_vis", mem, m_col_chunk_size); - mem += m_buffer.sfc_alb_dif_vis.totElems(); - m_buffer.sfc_alb_dif_nir = decltype(m_buffer.sfc_alb_dif_nir)("sfc_alb_dif_nir", mem, m_col_chunk_size); - mem += m_buffer.sfc_alb_dif_nir.totElems(); - m_buffer.sfc_flux_dir_vis = decltype(m_buffer.sfc_flux_dir_vis)("sfc_flux_dir_vis", mem, m_col_chunk_size); - mem += m_buffer.sfc_flux_dir_vis.totElems(); - m_buffer.sfc_flux_dir_nir = decltype(m_buffer.sfc_flux_dir_nir)("sfc_flux_dir_nir", mem, m_col_chunk_size); - mem += m_buffer.sfc_flux_dir_nir.totElems(); - m_buffer.sfc_flux_dif_vis = decltype(m_buffer.sfc_flux_dif_vis)("sfc_flux_dif_vis", mem, m_col_chunk_size); - mem += m_buffer.sfc_flux_dif_vis.totElems(); - m_buffer.sfc_flux_dif_nir = decltype(m_buffer.sfc_flux_dif_nir)("sfc_flux_dif_nir", mem, m_col_chunk_size); - mem += m_buffer.sfc_flux_dif_nir.totElems(); - m_buffer.cosine_zenith = decltype(m_buffer.cosine_zenith)(mem, m_col_chunk_size); - mem += m_buffer.cosine_zenith.size(); - - // 2d arrays - m_buffer.p_lay = decltype(m_buffer.p_lay)("p_lay", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.p_lay.totElems(); - m_buffer.t_lay = decltype(m_buffer.t_lay)("t_lay", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.t_lay.totElems(); - m_buffer.z_del = decltype(m_buffer.z_del)("z_del", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.z_del.totElems(); - m_buffer.p_del = decltype(m_buffer.p_del)("p_del", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.p_del.totElems(); - m_buffer.qc = decltype(m_buffer.qc)("qc", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.qc.totElems(); - m_buffer.nc = decltype(m_buffer.nc)("nc", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.nc.totElems(); - m_buffer.qi = decltype(m_buffer.qi)("qi", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.qi.totElems(); - m_buffer.cldfrac_tot = decltype(m_buffer.cldfrac_tot)("cldfrac_tot", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.cldfrac_tot.totElems(); - m_buffer.eff_radius_qc = decltype(m_buffer.eff_radius_qc)("eff_radius_qc", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.eff_radius_qc.totElems(); - m_buffer.eff_radius_qi = decltype(m_buffer.eff_radius_qi)("eff_radius_qi", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.eff_radius_qi.totElems(); - m_buffer.tmp2d = decltype(m_buffer.tmp2d)("tmp2d", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.tmp2d.totElems(); - m_buffer.lwp = decltype(m_buffer.lwp)("lwp", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.lwp.totElems(); - m_buffer.iwp = decltype(m_buffer.iwp)("iwp", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.iwp.totElems(); - m_buffer.sw_heating = decltype(m_buffer.sw_heating)("sw_heating", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.sw_heating.totElems(); - m_buffer.lw_heating = decltype(m_buffer.lw_heating)("lw_heating", mem, m_col_chunk_size, m_nlay); - mem += m_buffer.lw_heating.totElems(); - m_buffer.p_lev = decltype(m_buffer.p_lev)("p_lev", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.p_lev.totElems(); - m_buffer.t_lev = decltype(m_buffer.t_lev)("t_lev", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.t_lev.totElems(); - m_buffer.d_tint = decltype(m_buffer.d_tint)(mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.d_tint.size(); - m_buffer.d_dz = decltype(m_buffer.d_dz )(mem, m_col_chunk_size, m_nlay); - mem += m_buffer.d_dz.size(); - // 3d arrays - m_buffer.sw_flux_up = decltype(m_buffer.sw_flux_up)("sw_flux_up", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_flux_up.totElems(); - m_buffer.sw_flux_dn = decltype(m_buffer.sw_flux_dn)("sw_flux_dn", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_flux_dn.totElems(); - m_buffer.sw_flux_dn_dir = decltype(m_buffer.sw_flux_dn_dir)("sw_flux_dn_dir", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_flux_dn_dir.totElems(); - m_buffer.lw_flux_up = decltype(m_buffer.lw_flux_up)("lw_flux_up", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.lw_flux_up.totElems(); - m_buffer.lw_flux_dn = decltype(m_buffer.lw_flux_dn)("lw_flux_dn", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.lw_flux_dn.totElems(); - m_buffer.sw_clnclrsky_flux_up = decltype(m_buffer.sw_clnclrsky_flux_up)("sw_clnclrsky_flux_up", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_clnclrsky_flux_up.totElems(); - m_buffer.sw_clnclrsky_flux_dn = decltype(m_buffer.sw_clnclrsky_flux_dn)("sw_clnclrsky_flux_dn", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_clnclrsky_flux_dn.totElems(); - m_buffer.sw_clnclrsky_flux_dn_dir = decltype(m_buffer.sw_clnclrsky_flux_dn_dir)("sw_clnclrsky_flux_dn_dir", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_clnclrsky_flux_dn_dir.totElems(); - m_buffer.sw_clrsky_flux_up = decltype(m_buffer.sw_clrsky_flux_up)("sw_clrsky_flux_up", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_clrsky_flux_up.totElems(); - m_buffer.sw_clrsky_flux_dn = decltype(m_buffer.sw_clrsky_flux_dn)("sw_clrsky_flux_dn", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_clrsky_flux_dn.totElems(); - m_buffer.sw_clrsky_flux_dn_dir = decltype(m_buffer.sw_clrsky_flux_dn_dir)("sw_clrsky_flux_dn_dir", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_clrsky_flux_dn_dir.totElems(); - m_buffer.sw_clnsky_flux_up = decltype(m_buffer.sw_clnsky_flux_up)("sw_clnsky_flux_up", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_clnsky_flux_up.totElems(); - m_buffer.sw_clnsky_flux_dn = decltype(m_buffer.sw_clnsky_flux_dn)("sw_clnsky_flux_dn", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_clnsky_flux_dn.totElems(); - m_buffer.sw_clnsky_flux_dn_dir = decltype(m_buffer.sw_clnsky_flux_dn_dir)("sw_clnsky_flux_dn_dir", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.sw_clnsky_flux_dn_dir.totElems(); - m_buffer.lw_clnclrsky_flux_up = decltype(m_buffer.lw_clnclrsky_flux_up)("lw_clnclrsky_flux_up", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.lw_clnclrsky_flux_up.totElems(); - m_buffer.lw_clnclrsky_flux_dn = decltype(m_buffer.lw_clnclrsky_flux_dn)("lw_clnclrsky_flux_dn", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.lw_clnclrsky_flux_dn.totElems(); - m_buffer.lw_clrsky_flux_up = decltype(m_buffer.lw_clrsky_flux_up)("lw_clrsky_flux_up", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.lw_clrsky_flux_up.totElems(); - m_buffer.lw_clrsky_flux_dn = decltype(m_buffer.lw_clrsky_flux_dn)("lw_clrsky_flux_dn", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.lw_clrsky_flux_dn.totElems(); - m_buffer.lw_clnsky_flux_up = decltype(m_buffer.lw_clnsky_flux_up)("lw_clnsky_flux_up", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.lw_clnsky_flux_up.totElems(); - m_buffer.lw_clnsky_flux_dn = decltype(m_buffer.lw_clnsky_flux_dn)("lw_clnsky_flux_dn", mem, m_col_chunk_size, m_nlay+1); - mem += m_buffer.lw_clnsky_flux_dn.totElems(); - // 3d arrays with nswbands dimension (shortwave fluxes by band) - m_buffer.sw_bnd_flux_up = decltype(m_buffer.sw_bnd_flux_up)("sw_bnd_flux_up", mem, m_col_chunk_size, m_nlay+1, m_nswbands); - mem += m_buffer.sw_bnd_flux_up.totElems(); - m_buffer.sw_bnd_flux_dn = decltype(m_buffer.sw_bnd_flux_dn)("sw_bnd_flux_dn", mem, m_col_chunk_size, m_nlay+1, m_nswbands); - mem += m_buffer.sw_bnd_flux_dn.totElems(); - m_buffer.sw_bnd_flux_dir = decltype(m_buffer.sw_bnd_flux_dir)("sw_bnd_flux_dir", mem, m_col_chunk_size, m_nlay+1, m_nswbands); - mem += m_buffer.sw_bnd_flux_dir.totElems(); - m_buffer.sw_bnd_flux_dif = decltype(m_buffer.sw_bnd_flux_dif)("sw_bnd_flux_dif", mem, m_col_chunk_size, m_nlay+1, m_nswbands); - mem += m_buffer.sw_bnd_flux_dif.totElems(); - // 3d arrays with nlwbands dimension (longwave fluxes by band) - m_buffer.lw_bnd_flux_up = decltype(m_buffer.lw_bnd_flux_up)("lw_bnd_flux_up", mem, m_col_chunk_size, m_nlay+1, m_nlwbands); - mem += m_buffer.lw_bnd_flux_up.totElems(); - m_buffer.lw_bnd_flux_dn = decltype(m_buffer.lw_bnd_flux_dn)("lw_bnd_flux_dn", mem, m_col_chunk_size, m_nlay+1, m_nlwbands); - mem += m_buffer.lw_bnd_flux_dn.totElems(); - // 2d arrays with extra nswbands dimension (surface albedos by band) - m_buffer.sfc_alb_dir = decltype(m_buffer.sfc_alb_dir)("sfc_alb_dir", mem, m_col_chunk_size, m_nswbands); - mem += m_buffer.sfc_alb_dir.totElems(); - m_buffer.sfc_alb_dif = decltype(m_buffer.sfc_alb_dif)("sfc_alb_dif", mem, m_col_chunk_size, m_nswbands); - mem += m_buffer.sfc_alb_dif.totElems(); - // 3d arrays with extra band dimension (aerosol optics by band) - m_buffer.aero_tau_sw = decltype(m_buffer.aero_tau_sw)("aero_tau_sw", mem, m_col_chunk_size, m_nlay, m_nswbands); - mem += m_buffer.aero_tau_sw.totElems(); - m_buffer.aero_ssa_sw = decltype(m_buffer.aero_ssa_sw)("aero_ssa_sw", mem, m_col_chunk_size, m_nlay, m_nswbands); - mem += m_buffer.aero_ssa_sw.totElems(); - m_buffer.aero_g_sw = decltype(m_buffer.aero_g_sw )("aero_g_sw" , mem, m_col_chunk_size, m_nlay, m_nswbands); - mem += m_buffer.aero_g_sw.totElems(); - m_buffer.aero_tau_lw = decltype(m_buffer.aero_tau_lw)("aero_tau_lw", mem, m_col_chunk_size, m_nlay, m_nlwbands); - mem += m_buffer.aero_tau_lw.totElems(); - // 3d arrays with extra ngpt dimension (cloud optics by gpoint; primarily for debugging) - m_buffer.cld_tau_sw_gpt = decltype(m_buffer.cld_tau_sw_gpt)("cld_tau_sw_gpt", mem, m_col_chunk_size, m_nlay, m_nswgpts); - mem += m_buffer.cld_tau_sw_gpt.totElems(); - m_buffer.cld_tau_lw_gpt = decltype(m_buffer.cld_tau_lw_gpt)("cld_tau_lw_gpt", mem, m_col_chunk_size, m_nlay, m_nlwgpts); - mem += m_buffer.cld_tau_lw_gpt.totElems(); - m_buffer.cld_tau_sw_bnd = decltype(m_buffer.cld_tau_sw_bnd)("cld_tau_sw_bnd", mem, m_col_chunk_size, m_nlay, m_nswbands); - mem += m_buffer.cld_tau_sw_bnd.totElems(); - m_buffer.cld_tau_lw_bnd = decltype(m_buffer.cld_tau_lw_bnd)("cld_tau_lw_bnd", mem, m_col_chunk_size, m_nlay, m_nlwbands); - mem += m_buffer.cld_tau_lw_bnd.totElems(); -#endif - -#ifdef RRTMGP_ENABLE_KOKKOS // 1d arrays m_buffer.mu0_k = decltype(m_buffer.mu0_k)(mem, m_col_chunk_size); mem += m_buffer.mu0_k.size(); @@ -599,7 +448,6 @@ void RRTMGPRadiation::init_buffers(const ATMBufferManager &buffer_manager) mem += m_buffer.cld_tau_sw_bnd_k.size(); m_buffer.cld_tau_lw_bnd_k = decltype(m_buffer.cld_tau_lw_bnd_k)(mem, m_col_chunk_size, m_nlay, m_nlwbands); mem += m_buffer.cld_tau_lw_bnd_k.size(); -#endif size_t used_mem = (reinterpret_cast(mem) - buffer_manager.get_memory())*sizeof(Real); EKAT_REQUIRE_MSG(used_mem==requested_buffer_size_in_bytes(), "Error! Used memory != requested memory for RRTMGPRadiation."); @@ -639,19 +487,19 @@ void RRTMGPRadiation::initialize_impl(const RunType /* run_type */) { // Whether or not to do MCICA subcolumn sampling m_do_subcol_sampling = m_params.get("do_subcol_sampling",true); - // Initialize yakl + // Initialize kokkos init_kls(); // Names of active gases - auto gas_names_yakl_offset = string1dv(m_ngas); - m_gas_mol_weights = real1dk("gas_mol_weights",m_ngas); + auto gas_names_offset = string1dv(m_ngas); + m_gas_mol_weights = real1dk("gas_mol_weights",m_ngas); // the lookup function for getting the gas mol weights doesn't work on device auto gas_mol_w_host = Kokkos::create_mirror_view(m_gas_mol_weights); for (int igas = 0; igas < m_ngas; igas++) { const auto& gas_name = m_gas_names[igas]; - gas_names_yakl_offset[igas] = gas_name; - gas_mol_w_host[igas] = PC::get_gas_mol_weight(gas_name); + gas_names_offset[igas] = gas_name; + gas_mol_w_host[igas] = PC::get_gas_mol_weight(gas_name); } Kokkos::deep_copy(m_gas_mol_weights,gas_mol_w_host); @@ -661,19 +509,9 @@ void RRTMGPRadiation::initialize_impl(const RunType /* run_type */) { std::string coefficients_file_lw = m_params.get("rrtmgp_coefficients_file_lw"); std::string cloud_optics_file_sw = m_params.get("rrtmgp_cloud_optics_file_sw"); std::string cloud_optics_file_lw = m_params.get("rrtmgp_cloud_optics_file_lw"); -#ifdef RRTMGP_ENABLE_YAKL - m_gas_concs.init(gas_names_yakl_offset,m_col_chunk_size,m_nlay); - rrtmgp::rrtmgp_initialize( - m_gas_concs, - coefficients_file_sw, coefficients_file_lw, - cloud_optics_file_sw, cloud_optics_file_lw, - m_atm_logger - ); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS const double multiplier = m_params.get("pool_size_multiplier", 1.0); - m_gas_concs_k.init(gas_names_yakl_offset,m_col_chunk_size,m_nlay); + m_gas_concs_k.init(gas_names_offset,m_col_chunk_size,m_nlay); interface_t::rrtmgp_initialize( m_gas_concs_k, coefficients_file_sw, coefficients_file_lw, @@ -681,12 +519,6 @@ void RRTMGPRadiation::initialize_impl(const RunType /* run_type */) { m_atm_logger, multiplier ); - VALIDATE_KOKKOS(m_gas_concs, m_gas_concs_k); - VALIDATE_KOKKOS(rrtmgp::k_dist_sw, *interface_t::k_dist_sw_k); - VALIDATE_KOKKOS(rrtmgp::k_dist_lw, *interface_t::k_dist_lw_k); - VALIDATE_KOKKOS(rrtmgp::cloud_optics_sw, *interface_t::cloud_optics_sw_k); - VALIDATE_KOKKOS(rrtmgp::cloud_optics_lw, *interface_t::cloud_optics_lw_k); -#endif // Set property checks for fields in this process add_invariant_check(get_field_out("T_mid"),m_grid,100.0, 500.0,false); @@ -813,14 +645,8 @@ void RRTMGPRadiation::run_impl (const double dt) { // On each chunk, we internally "reset" the GasConcs object to subview the concs 3d array // with the correct ncol dimension. So let's keep a copy of the original (ref-counted) // array, to restore at the end inside the m_gast_concs object. -#ifdef RRTMGP_ENABLE_YAKL - auto gas_concs = m_gas_concs.concs; - auto orig_ncol = m_gas_concs.ncol; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS auto gas_concs_k = m_gas_concs_k.concs; auto orig_ncol_k = m_gas_concs_k.ncol; -#endif // Compute orbital parameters; these are used both for computing // the solar zenith angle and also for computing total solar @@ -909,80 +735,6 @@ void RRTMGPRadiation::run_impl (const double dt) { ulrreal2dk d_tint = ulrreal2dk(m_buffer.d_tint.data(), m_col_chunk_size, m_nlay+1); ulrreal2dk d_dz = ulrreal2dk(m_buffer.d_dz.data(), m_col_chunk_size, m_nlay); auto d_mu0 = m_buffer.cosine_zenith; -#ifdef RRTMGP_ENABLE_YAKL - TIMED_INLINE_KERNEL(init_views, - // Create YAKL arrays. RRTMGP expects YAKL arrays with styleFortran, i.e., data has ncol - // as the fastest index. For this reason we must copy the data. - auto subview_1d = [&](const real1d v) -> real1d { - return real1d(v.label(),v.myData,ncol); - }; - auto subview_2d = [&](const real2d v) -> real2d { - return real2d(v.label(),v.myData,ncol,v.dimension[1]); - }; - auto subview_3d = [&](const real3d v) -> real3d { - return real3d(v.label(),v.myData,ncol,v.dimension[1],v.dimension[2]); - }; - - auto p_lay = subview_2d(m_buffer.p_lay); - auto t_lay = subview_2d(m_buffer.t_lay); - auto p_lev = subview_2d(m_buffer.p_lev); - auto z_del = subview_2d(m_buffer.z_del); - auto p_del = subview_2d(m_buffer.p_del); - auto t_lev = subview_2d(m_buffer.t_lev); - auto mu0 = subview_1d(m_buffer.mu0); - auto sfc_alb_dir = subview_2d(m_buffer.sfc_alb_dir); - auto sfc_alb_dif = subview_2d(m_buffer.sfc_alb_dif); - auto sfc_alb_dir_vis = subview_1d(m_buffer.sfc_alb_dir_vis); - auto sfc_alb_dir_nir = subview_1d(m_buffer.sfc_alb_dir_nir); - auto sfc_alb_dif_vis = subview_1d(m_buffer.sfc_alb_dif_vis); - auto sfc_alb_dif_nir = subview_1d(m_buffer.sfc_alb_dif_nir); - auto qc = subview_2d(m_buffer.qc); - auto nc = subview_2d(m_buffer.nc); - auto qi = subview_2d(m_buffer.qi); - auto cldfrac_tot = subview_2d(m_buffer.cldfrac_tot); - auto rel = subview_2d(m_buffer.eff_radius_qc); - auto rei = subview_2d(m_buffer.eff_radius_qi); - auto sw_flux_up = subview_2d(m_buffer.sw_flux_up); - auto sw_flux_dn = subview_2d(m_buffer.sw_flux_dn); - auto sw_flux_dn_dir = subview_2d(m_buffer.sw_flux_dn_dir); - auto lw_flux_up = subview_2d(m_buffer.lw_flux_up); - auto lw_flux_dn = subview_2d(m_buffer.lw_flux_dn); - auto sw_clnclrsky_flux_up = subview_2d(m_buffer.sw_clnclrsky_flux_up); - auto sw_clnclrsky_flux_dn = subview_2d(m_buffer.sw_clnclrsky_flux_dn); - auto sw_clnclrsky_flux_dn_dir = subview_2d(m_buffer.sw_clnclrsky_flux_dn_dir); - auto sw_clrsky_flux_up = subview_2d(m_buffer.sw_clrsky_flux_up); - auto sw_clrsky_flux_dn = subview_2d(m_buffer.sw_clrsky_flux_dn); - auto sw_clrsky_flux_dn_dir = subview_2d(m_buffer.sw_clrsky_flux_dn_dir); - auto sw_clnsky_flux_up = subview_2d(m_buffer.sw_clnsky_flux_up); - auto sw_clnsky_flux_dn = subview_2d(m_buffer.sw_clnsky_flux_dn); - auto sw_clnsky_flux_dn_dir = subview_2d(m_buffer.sw_clnsky_flux_dn_dir); - auto lw_clnclrsky_flux_up = subview_2d(m_buffer.lw_clnclrsky_flux_up); - auto lw_clnclrsky_flux_dn = subview_2d(m_buffer.lw_clnclrsky_flux_dn); - auto lw_clrsky_flux_up = subview_2d(m_buffer.lw_clrsky_flux_up); - auto lw_clrsky_flux_dn = subview_2d(m_buffer.lw_clrsky_flux_dn); - auto lw_clnsky_flux_up = subview_2d(m_buffer.lw_clnsky_flux_up); - auto lw_clnsky_flux_dn = subview_2d(m_buffer.lw_clnsky_flux_dn); - auto sw_bnd_flux_up = subview_3d(m_buffer.sw_bnd_flux_up); - auto sw_bnd_flux_dn = subview_3d(m_buffer.sw_bnd_flux_dn); - auto sw_bnd_flux_dir = subview_3d(m_buffer.sw_bnd_flux_dir); - auto sw_bnd_flux_dif = subview_3d(m_buffer.sw_bnd_flux_dif); - auto lw_bnd_flux_up = subview_3d(m_buffer.lw_bnd_flux_up); - auto lw_bnd_flux_dn = subview_3d(m_buffer.lw_bnd_flux_dn); - auto sfc_flux_dir_vis = subview_1d(m_buffer.sfc_flux_dir_vis); - auto sfc_flux_dir_nir = subview_1d(m_buffer.sfc_flux_dir_nir); - auto sfc_flux_dif_vis = subview_1d(m_buffer.sfc_flux_dif_vis); - auto sfc_flux_dif_nir = subview_1d(m_buffer.sfc_flux_dif_nir); - auto aero_tau_sw = subview_3d(m_buffer.aero_tau_sw); - auto aero_ssa_sw = subview_3d(m_buffer.aero_ssa_sw); - auto aero_g_sw = subview_3d(m_buffer.aero_g_sw); - auto aero_tau_lw = subview_3d(m_buffer.aero_tau_lw); - auto cld_tau_sw_bnd = subview_3d(m_buffer.cld_tau_sw_bnd); - auto cld_tau_lw_bnd = subview_3d(m_buffer.cld_tau_lw_bnd); - auto cld_tau_sw_gpt = subview_3d(m_buffer.cld_tau_sw_gpt); - auto cld_tau_lw_gpt = subview_3d(m_buffer.cld_tau_lw_gpt); - ); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS ConvertToRrtmgpSubview conv = {beg, ncol}; TIMED_INLINE_KERNEL(init_views, @@ -1046,19 +798,12 @@ void RRTMGPRadiation::run_impl (const double dt) { auto cld_tau_sw_gpt_k = conv.subview3d(m_buffer.cld_tau_sw_gpt_k); auto cld_tau_lw_gpt_k = conv.subview3d(m_buffer.cld_tau_lw_gpt_k); ); -#endif // Set gas concs to "view" only the first ncol columns -#ifdef RRTMGP_ENABLE_YAKL - m_gas_concs.ncol = ncol; - m_gas_concs.concs = subview_3d(gas_concs); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS m_gas_concs_k.ncol = ncol; m_gas_concs_k.concs = conv.subview3d(gas_concs_k); -#endif - // Copy data from the FieldManager to the YAKL arrays + // Copy data from the FieldManager to the Kokkos Views { // Determine the cosine zenith angle // NOTE: Since we are bridging to F90 arrays this must be done on HOST and then @@ -1113,60 +858,6 @@ void RRTMGPRadiation::run_impl (const double dt) { } team.team_barrier(); -#ifdef RRTMGP_ENABLE_YAKL - mu0(i+1) = d_mu0(i); - sfc_alb_dir_vis(i+1) = d_sfc_alb_dir_vis(icol); - sfc_alb_dir_nir(i+1) = d_sfc_alb_dir_nir(icol); - sfc_alb_dif_vis(i+1) = d_sfc_alb_dif_vis(icol); - sfc_alb_dif_nir(i+1) = d_sfc_alb_dif_nir(icol); - - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& k) { - p_lay(i+1,k+1) = d_pmid(icol,k); - t_lay(i+1,k+1) = d_tmid(icol,k); - z_del(i+1,k+1) = d_dz(i,k); - p_del(i+1,k+1) = d_pdel(icol,k); - qc(i+1,k+1) = d_qc(icol,k); - nc(i+1,k+1) = d_nc(icol,k); - qi(i+1,k+1) = d_qi(icol,k); - rel(i+1,k+1) = d_rel(icol,k); - rei(i+1,k+1) = d_rei(icol,k); - p_lev(i+1,k+1) = d_pint(icol,k); - t_lev(i+1,k+1) = d_tint(i,k); - }); - - p_lev(i+1,nlay+1) = d_pint(icol,nlay); - t_lev(i+1,nlay+1) = d_tint(i,nlay); - - // Note that RRTMGP expects ordering (col,lay,bnd) but the FM keeps things in (col,bnd,lay) order - if (do_aerosol_rad) { - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nswbands*nlay), [&] (const int&idx) { - auto b = idx / nlay; - auto k = idx % nlay; - aero_tau_sw(i+1,k+1,b+1) = d_aero_tau_sw(icol,b,k); - aero_ssa_sw(i+1,k+1,b+1) = d_aero_ssa_sw(icol,b,k); - aero_g_sw (i+1,k+1,b+1) = d_aero_g_sw (icol,b,k); - }); - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlwbands*nlay), [&] (const int&idx) { - auto b = idx / nlay; - auto k = idx % nlay; - aero_tau_lw(i+1,k+1,b+1) = d_aero_tau_lw(icol,b,k); - }); - } else { - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nswbands*nlay), [&] (const int&idx) { - auto b = idx / nlay; - auto k = idx % nlay; - aero_tau_sw(i+1,k+1,b+1) = 0; - aero_ssa_sw(i+1,k+1,b+1) = 0; - aero_g_sw (i+1,k+1,b+1) = 0; - }); - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlwbands*nlay), [&] (const int&idx) { - auto b = idx / nlay; - auto k = idx % nlay; - aero_tau_lw(i+1,k+1,b+1) = 0; - }); - } -#endif -#ifdef RRTMGP_ENABLE_KOKKOS #ifdef RRTMGP_LAYOUT_LEFT // Copy to layout left buffer views Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& k) { @@ -1221,24 +912,16 @@ void RRTMGPRadiation::run_impl (const double dt) { aero_tau_lw_k(i,k,b) = 0.0; }); } -#endif }); ); } Kokkos::fence(); -#ifdef RRTMGP_ENABLE_KOKKOS - COMPARE_ALL_WRAP(std::vector({aero_tau_sw, aero_ssa_sw, aero_g_sw, aero_tau_lw}), - std::vector({aero_tau_sw_k, aero_ssa_sw_k, aero_g_sw_k, aero_tau_lw_k})); -#endif // Populate GasConcs object to pass to RRTMGP driver // set_vmr requires the input array size to have the correct size, // and the last chunk may have less columns, so create a temp of // correct size that uses m_buffer.tmp2d's pointer -#ifdef RRTMGP_ENABLE_YAKL - real2d tmp2d = subview_2d(m_buffer.tmp2d); -#endif for (int igas = 0; igas < m_ngas; igas++) { auto name = m_gas_names[igas]; auto full_name = name + "_volume_mix_ratio"; @@ -1246,31 +929,10 @@ void RRTMGPRadiation::run_impl (const double dt) { // 'o3' is marked as 'Required' rather than 'Computed', so we need to get the proper field auto f = name=="o3" ? get_field_in(full_name) : get_field_out(full_name); auto d_vmr = f.get_view(); -#ifdef RRTMGP_ENABLE_KOKKOS auto tmp2d_k = conv.subview2d_impl(d_vmr, m_nlay); -#endif - -#ifdef RRTMGP_ENABLE_YAKL - // Copy to YAKL - const auto policy = ekat::ExeSpaceUtils::get_default_team_policy(ncol, m_nlay); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const MemberType& team) { - const int i = team.league_rank(); - const int icol = i + beg; - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& k) { - tmp2d(i+1,k+1) = d_vmr(icol,k); // Note that for YAKL arrays i and k start with index 1 - }); - }); - Kokkos::fence(); -#endif // Populate GasConcs object -#ifdef RRTMGP_ENABLE_YAKL - m_gas_concs.set_vmr(name, tmp2d); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS - COMPARE_WRAP(tmp2d, tmp2d_k); m_gas_concs_k.set_vmr(name, tmp2d_k); -#endif } // Set layer cloud fraction. @@ -1284,36 +946,20 @@ void RRTMGPRadiation::run_impl (const double dt) { // If we *are* doing subcolumn sampling for MCICA, then keep cloud fraction as input // from cloud fraction parameterization, wherever that is computed. auto do_subcol_sampling = m_do_subcol_sampling; -#ifdef RRTMGP_ENABLE_YAKL - auto lwp = m_buffer.lwp; - auto iwp = m_buffer.iwp; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS auto lwp_k = m_buffer.lwp_k; auto iwp_k = m_buffer.iwp_k; -#endif if (not do_subcol_sampling) { const auto policy = ekat::ExeSpaceUtils::get_default_team_policy(ncol, m_nlay); Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const MemberType& team) { const int i = team.league_rank(); const int icol = i + beg; Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& k) { -#ifdef RRTMGP_ENABLE_YAKL - if (d_cldfrac_tot(icol,k) > 0) { - cldfrac_tot(i+1,k+1) = 1; - } else { - cldfrac_tot(i+1,k+1) = 0; - } - d_cldfrac_rad(icol,k) = cldfrac_tot(i+1,k+1); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS if (d_cldfrac_tot(icol,k) > 0) { cldfrac_tot_k(i,k) = 1; } else { cldfrac_tot_k(i,k) = 0; } d_cldfrac_rad(icol,k) = cldfrac_tot_k(i,k); -#endif }); }); } else { @@ -1322,47 +968,24 @@ void RRTMGPRadiation::run_impl (const double dt) { const int i = team.league_rank(); const int icol = i + beg; Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& k) { -#ifdef RRTMGP_ENABLE_YAKL - cldfrac_tot(i+1,k+1) = d_cldfrac_tot(icol,k); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS cldfrac_tot_k(i,k) = d_cldfrac_tot(icol,k); -#endif d_cldfrac_rad(icol,k) = d_cldfrac_tot(icol,k); }); }); } Kokkos::fence(); -#ifdef RRTMGP_ENABLE_KOKKOS - COMPARE_WRAP(cldfrac_tot, cldfrac_tot_k); -#endif // Compute layer cloud mass (per unit area) -#ifdef RRTMGP_ENABLE_YAKL - rrtmgp::mixing_ratio_to_cloud_mass(qc, cldfrac_tot, p_del, lwp); - rrtmgp::mixing_ratio_to_cloud_mass(qi, cldfrac_tot, p_del, iwp); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS interface_t::mixing_ratio_to_cloud_mass(qc_k, cldfrac_tot_k, p_del_k, lwp_k); interface_t::mixing_ratio_to_cloud_mass(qi_k, cldfrac_tot_k, p_del_k, iwp_k); - COMPARE_ALL_WRAP(std::vector({lwp, iwp}), - std::vector({lwp_k, iwp_k})); -#endif // Convert to g/m2 (needed by RRTMGP) { const auto policy = ekat::ExeSpaceUtils::get_default_team_policy(ncol, m_nlay); Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const MemberType& team) { const int i = team.league_rank(); Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& k) { - // Note that for YAKL arrays i and k start with index 1 -#ifdef RRTMGP_ENABLE_YAKL - lwp(i+1,k+1) *= 1e3; - iwp(i+1,k+1) *= 1e3; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS lwp_k(i,k) *= 1e3; iwp_k(i,k) *= 1e3; -#endif }); }); } @@ -1370,16 +993,6 @@ void RRTMGPRadiation::run_impl (const double dt) { // Compute band-by-band surface_albedos. This is needed since // the AD passes broadband albedos, but rrtmgp require band-by-band. -#ifdef RRTMGP_ENABLE_YAKL - TIMED_KERNEL( - rrtmgp::compute_band_by_band_surface_albedos( - ncol, nswbands, - sfc_alb_dir_vis, sfc_alb_dir_nir, - sfc_alb_dif_vis, sfc_alb_dif_nir, - sfc_alb_dir, sfc_alb_dif); - ); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS TIMED_KERNEL( interface_t::compute_band_by_band_surface_albedos( ncol, nswbands, @@ -1387,37 +1000,9 @@ void RRTMGPRadiation::run_impl (const double dt) { sfc_alb_dif_vis_k, sfc_alb_dif_nir_k, sfc_alb_dir_k, sfc_alb_dif_k); ); - COMPARE_ALL_WRAP(std::vector({sfc_alb_dir, sfc_alb_dif}), - std::vector({sfc_alb_dir_k, sfc_alb_dif_k})); -#endif // Compute cloud optical properties here? // Run RRTMGP driver -#ifdef RRTMGP_ENABLE_YAKL - TIMED_KERNEL( - rrtmgp::rrtmgp_main( - ncol, m_nlay, - p_lay, t_lay, p_lev, t_lev, - m_gas_concs, - sfc_alb_dir, sfc_alb_dif, mu0, - lwp, iwp, rel, rei, cldfrac_tot, - aero_tau_sw, aero_ssa_sw, aero_g_sw, aero_tau_lw, - cld_tau_sw_bnd, cld_tau_lw_bnd, - cld_tau_sw_gpt, cld_tau_lw_gpt, - sw_flux_up , sw_flux_dn , sw_flux_dn_dir , lw_flux_up , lw_flux_dn, - sw_clnclrsky_flux_up, sw_clnclrsky_flux_dn, sw_clnclrsky_flux_dn_dir, - sw_clrsky_flux_up, sw_clrsky_flux_dn, sw_clrsky_flux_dn_dir, - sw_clnsky_flux_up, sw_clnsky_flux_dn, sw_clnsky_flux_dn_dir, - lw_clnclrsky_flux_up, lw_clnclrsky_flux_dn, - lw_clrsky_flux_up, lw_clrsky_flux_dn, - lw_clnsky_flux_up, lw_clnsky_flux_dn, - sw_bnd_flux_up , sw_bnd_flux_dn , sw_bnd_flux_dir , lw_bnd_flux_up , lw_bnd_flux_dn, - eccf, m_atm_logger, - m_extra_clnclrsky_diag, m_extra_clnsky_diag - ); - ); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS TIMED_KERNEL( interface_t::rrtmgp_main( ncol, m_nlay, @@ -1440,53 +1025,8 @@ void RRTMGPRadiation::run_impl (const double dt) { m_extra_clnclrsky_diag, m_extra_clnsky_diag ); ); - COMPARE_ALL_WRAP(std::vector({ - sw_flux_up, sw_flux_dn, sw_flux_dn_dir, lw_flux_up, lw_flux_dn, - sw_clnclrsky_flux_up, sw_clnclrsky_flux_dn, sw_clnclrsky_flux_dn_dir, - sw_clrsky_flux_up, sw_clrsky_flux_dn, sw_clrsky_flux_dn_dir, - sw_clnsky_flux_up, sw_clnsky_flux_dn, sw_clnsky_flux_dn_dir, - lw_clnclrsky_flux_up, lw_clnclrsky_flux_dn, - lw_clrsky_flux_up, lw_clrsky_flux_dn, - lw_clnsky_flux_up, lw_clnsky_flux_dn}), - std::vector({ - sw_flux_up_k, sw_flux_dn_k, sw_flux_dn_dir_k, lw_flux_up_k, lw_flux_dn_k, - sw_clnclrsky_flux_up_k, sw_clnclrsky_flux_dn_k, sw_clnclrsky_flux_dn_dir_k, - sw_clrsky_flux_up_k, sw_clrsky_flux_dn_k, sw_clrsky_flux_dn_dir_k, - sw_clnsky_flux_up_k, sw_clnsky_flux_dn_k, sw_clnsky_flux_dn_dir_k, - lw_clnclrsky_flux_up_k, lw_clnclrsky_flux_dn_k, - lw_clrsky_flux_up_k, lw_clrsky_flux_dn_k, - lw_clnsky_flux_up_k, lw_clnsky_flux_dn_k})); - COMPARE_ALL_WRAP(std::vector({sw_bnd_flux_up, sw_bnd_flux_dn, sw_bnd_flux_dir, lw_bnd_flux_up, lw_bnd_flux_dn}), - std::vector({sw_bnd_flux_up_k, sw_bnd_flux_dn_k, sw_bnd_flux_dir_k, lw_bnd_flux_up_k, lw_bnd_flux_dn_k})); -#endif // Update heating tendency -#ifdef RRTMGP_ENABLE_YAKL - TIMED_INLINE_KERNEL(heating_tendency, - auto sw_heating = m_buffer.sw_heating; - auto lw_heating = m_buffer.lw_heating; - rrtmgp::compute_heating_rate( - sw_flux_up, sw_flux_dn, p_del, sw_heating - ); - rrtmgp::compute_heating_rate( - lw_flux_up, lw_flux_dn, p_del, lw_heating - ); - { - const auto policy = ekat::ExeSpaceUtils::get_default_team_policy(ncol, m_nlay); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const MemberType& team) { - const int idx = team.league_rank(); - const int icol = idx+beg; - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& ilay) { - // Combine SW and LW heating into a net heating tendency; use d_rad_heating_pdel temporarily - // Note that for YAKL arrays i and k start with index 1 - d_rad_heating_pdel(icol,ilay) = sw_heating(idx+1,ilay+1) + lw_heating(idx+1,ilay+1); - }); - }); - } - Kokkos::fence(); - ); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS TIMED_INLINE_KERNEL(heating_tendency, auto sw_heating_k = m_buffer.sw_heating_k; auto lw_heating_k = m_buffer.lw_heating_k; @@ -1503,48 +1043,20 @@ void RRTMGPRadiation::run_impl (const double dt) { const int icol = idx+beg; Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& ilay) { // Combine SW and LW heating into a net heating tendency; use d_rad_heating_pdel temporarily - // Note that for YAKL arrays i and k start with index 1 d_rad_heating_pdel(icol,ilay) = sw_heating_k(idx,ilay) + lw_heating_k(idx,ilay); }); }); } Kokkos::fence(); ); - COMPARE_ALL_WRAP(std::vector({sw_heating, lw_heating}), - std::vector({sw_heating_k, lw_heating_k})); -#endif // Index to surface (bottom of model); used to get surface fluxes below -#ifdef RRTMGP_ENABLE_YAKL - const int kbot = nlay+1; - - TIMED_KERNEL( - // Compute diffuse flux as difference between total and direct - Kokkos::parallel_for(Kokkos::RangePolicy(0,nswbands*(nlay+1)*ncol), - KOKKOS_LAMBDA (const int idx) { - // CAREFUL: these are YAKL arrays, with "LayoutLeft". So make the indices stride accordingly, and add 1. - const int ibnd = (idx / ncol) / (nlay+1) + 1; - const int ilev = (idx / ncol) % (nlay+1) + 1; - const int icol = idx % ncol + 1; - sw_bnd_flux_dif(icol,ilev,ibnd) = sw_bnd_flux_dn(icol,ilev,ibnd) - sw_bnd_flux_dir(icol,ilev,ibnd); - }); - // Compute surface fluxes - rrtmgp::compute_broadband_surface_fluxes( - ncol, kbot, nswbands, - sw_bnd_flux_dir, sw_bnd_flux_dif, - sfc_flux_dir_vis, sfc_flux_dir_nir, - sfc_flux_dif_vis, sfc_flux_dif_nir - ); - ); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS const int kbot_k = nlay; TIMED_KERNEL( // Compute diffuse flux as difference between total and direct Kokkos::parallel_for(Kokkos::RangePolicy(0,nswbands*(nlay+1)*ncol), KOKKOS_LAMBDA (const int idx) { - // CAREFUL: these are YAKL arrays, with "LayoutLeft". So make the indices stride accordingly, and add 1. const int ibnd = (idx / ncol) / (nlay+1); const int ilev = (idx / ncol) % (nlay+1); const int icol = idx % ncol; @@ -1558,30 +1070,8 @@ void RRTMGPRadiation::run_impl (const double dt) { sfc_flux_dif_vis_k, sfc_flux_dif_nir_k ); ); - COMPARE_ALL_WRAP(std::vector({sfc_flux_dir_vis, sfc_flux_dir_nir, sfc_flux_dif_vis, sfc_flux_dif_nir}), - std::vector({sfc_flux_dir_vis_k, sfc_flux_dir_nir_k, sfc_flux_dif_vis_k, sfc_flux_dif_nir_k})); -#endif // Compute diagnostic total cloud area (vertically-projected cloud cover) -#ifdef RRTMGP_ENABLE_YAKL - TIMED_KERNEL( - real1d cldlow ("cldlow", d_cldlow.data() + m_col_chunk_beg[ic], ncol); - real1d cldmed ("cldmed", d_cldmed.data() + m_col_chunk_beg[ic], ncol); - real1d cldhgh ("cldhgh", d_cldhgh.data() + m_col_chunk_beg[ic], ncol); - real1d cldtot ("cldtot", d_cldtot.data() + m_col_chunk_beg[ic], ncol); - // NOTE: limits for low, mid, and high clouds are mostly taken from EAM F90 source, with the - // exception that I removed the restriction on low clouds to be above (numerically lower pressures) - // 1200 hPa, and on high clouds to be below (numerically high pressures) 50 hPa. This probably - // does not matter in practice, as clouds probably should not be produced above 50 hPa and we - // should not be encountering surface pressure above 1200 hPa, but in the event that things go off - // the rails we might want to look at these still. - rrtmgp::compute_cloud_area(ncol, nlay, nlwgpts, 700e2, std::numeric_limits::max(), p_lay, cld_tau_lw_gpt, cldlow); - rrtmgp::compute_cloud_area(ncol, nlay, nlwgpts, 400e2, 700e2, p_lay, cld_tau_lw_gpt, cldmed); - rrtmgp::compute_cloud_area(ncol, nlay, nlwgpts, 0, 400e2, p_lay, cld_tau_lw_gpt, cldhgh); - rrtmgp::compute_cloud_area(ncol, nlay, nlwgpts, 0, std::numeric_limits::max(), p_lay, cld_tau_lw_gpt, cldtot); - ); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS TIMED_KERNEL( real1dk cldlow_k (d_cldlow.data() + m_col_chunk_beg[ic], ncol); real1dk cldmed_k (d_cldmed.data() + m_col_chunk_beg[ic], ncol); @@ -1598,37 +1088,8 @@ void RRTMGPRadiation::run_impl (const double dt) { interface_t::compute_cloud_area(ncol, nlay, nlwgpts, 0, 400e2, p_lay_k, cld_tau_lw_gpt_k, cldhgh_k); interface_t::compute_cloud_area(ncol, nlay, nlwgpts, 0, std::numeric_limits::max(), p_lay_k, cld_tau_lw_gpt_k, cldtot_k); ); - COMPARE_ALL_WRAP(std::vector({cldlow, cldmed, cldhgh, cldtot}), - std::vector({cldlow_k, cldmed_k, cldhgh_k, cldtot_k})); -#endif // Compute cloud-top diagnostics following AeroCOM recommendation -#ifdef RRTMGP_ENABLE_YAKL - TIMED_INLINE_KERNEL(cloud_top, - - // Get visible 0.67 micron band for COSP - auto idx_067 = rrtmgp::get_wavelength_index_sw(0.67e-6); - // Get IR 10.5 micron band for COSP - auto idx_105 = rrtmgp::get_wavelength_index_lw(10.5e-6); - - // Compute cloud-top diagnostics following AeroCom recommendation - real1d T_mid_at_cldtop ("T_mid_at_cldtop", d_T_mid_at_cldtop.data() + m_col_chunk_beg[ic], ncol); - real1d p_mid_at_cldtop ("p_mid_at_cldtop", d_p_mid_at_cldtop.data() + m_col_chunk_beg[ic], ncol); - real1d cldfrac_ice_at_cldtop ("cldfrac_ice_at_cldtop", d_cldfrac_ice_at_cldtop.data() + m_col_chunk_beg[ic], ncol); - real1d cldfrac_liq_at_cldtop ("cldfrac_liq_at_cldtop", d_cldfrac_liq_at_cldtop.data() + m_col_chunk_beg[ic], ncol); - real1d cldfrac_tot_at_cldtop ("cldfrac_tot_at_cldtop", d_cldfrac_tot_at_cldtop.data() + m_col_chunk_beg[ic], ncol); - real1d cdnc_at_cldtop ("cdnc_at_cldtop", d_cdnc_at_cldtop.data() + m_col_chunk_beg[ic], ncol); - real1d eff_radius_qc_at_cldtop ("eff_radius_qc_at_cldtop", d_eff_radius_qc_at_cldtop.data() + m_col_chunk_beg[ic], ncol); - real1d eff_radius_qi_at_cldtop ("eff_radius_qi_at_cldtop", d_eff_radius_qi_at_cldtop.data() + m_col_chunk_beg[ic], ncol); - - rrtmgp::compute_aerocom_cloudtop( - ncol, nlay, t_lay, p_lay, p_del, z_del, qc, qi, rel, rei, cldfrac_tot, - nc, T_mid_at_cldtop, p_mid_at_cldtop, cldfrac_ice_at_cldtop, - cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop, - eff_radius_qc_at_cldtop, eff_radius_qi_at_cldtop); - ); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS TIMED_INLINE_KERNEL(cloud_top, // Get visible 0.67 micron band for COSP auto idx_067_k = interface_t::get_wavelength_index_sw_k(0.67e-6); @@ -1650,65 +1111,9 @@ void RRTMGPRadiation::run_impl (const double dt) { cldfrac_liq_at_cldtop_k, cldfrac_tot_at_cldtop_k, cdnc_at_cldtop_k, eff_radius_qc_at_cldtop_k, eff_radius_qi_at_cldtop_k); ); - COMPARE_ALL_WRAP(std::vector({ - T_mid_at_cldtop, p_mid_at_cldtop, cldfrac_ice_at_cldtop, - cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop, - eff_radius_qc_at_cldtop, eff_radius_qi_at_cldtop}), - std::vector({ - T_mid_at_cldtop_k, p_mid_at_cldtop_k, cldfrac_ice_at_cldtop_k, - cldfrac_liq_at_cldtop_k, cldfrac_tot_at_cldtop_k, cdnc_at_cldtop_k, - eff_radius_qc_at_cldtop_k, eff_radius_qi_at_cldtop_k})); -#endif // Copy output data back to FieldManager const auto policy = ekat::ExeSpaceUtils::get_default_team_policy(ncol, m_nlay); -#ifdef RRTMGP_ENABLE_YAKL - TIMED_KERNEL( - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const MemberType& team) { - const int i = team.league_rank(); - const int icol = i + beg; - d_sfc_flux_dir_nir(icol) = sfc_flux_dir_nir(i+1); - d_sfc_flux_dir_vis(icol) = sfc_flux_dir_vis(i+1); - d_sfc_flux_dif_nir(icol) = sfc_flux_dif_nir(i+1); - d_sfc_flux_dif_vis(icol) = sfc_flux_dif_vis(i+1); - d_sfc_flux_sw_net(icol) = sw_flux_dn(i+1,kbot) - sw_flux_up(i+1,kbot); - d_sfc_flux_lw_dn(icol) = lw_flux_dn(i+1,kbot); - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay+1), [&] (const int& k) { - d_sw_flux_up(icol,k) = sw_flux_up(i+1,k+1); - d_sw_flux_dn(icol,k) = sw_flux_dn(i+1,k+1); - d_sw_flux_dn_dir(icol,k) = sw_flux_dn_dir(i+1,k+1); - d_lw_flux_up(icol,k) = lw_flux_up(i+1,k+1); - d_lw_flux_dn(icol,k) = lw_flux_dn(i+1,k+1); - d_sw_clnclrsky_flux_up(icol,k) = sw_clnclrsky_flux_up(i+1,k+1); - d_sw_clnclrsky_flux_dn(icol,k) = sw_clnclrsky_flux_dn(i+1,k+1); - d_sw_clnclrsky_flux_dn_dir(icol,k) = sw_clnclrsky_flux_dn_dir(i+1,k+1); - d_sw_clrsky_flux_up(icol,k) = sw_clrsky_flux_up(i+1,k+1); - d_sw_clrsky_flux_dn(icol,k) = sw_clrsky_flux_dn(i+1,k+1); - d_sw_clrsky_flux_dn_dir(icol,k) = sw_clrsky_flux_dn_dir(i+1,k+1); - d_sw_clnsky_flux_up(icol,k) = sw_clnsky_flux_up(i+1,k+1); - d_sw_clnsky_flux_dn(icol,k) = sw_clnsky_flux_dn(i+1,k+1); - d_sw_clnsky_flux_dn_dir(icol,k) = sw_clnsky_flux_dn_dir(i+1,k+1); - d_lw_clnclrsky_flux_up(icol,k) = lw_clnclrsky_flux_up(i+1,k+1); - d_lw_clnclrsky_flux_dn(icol,k) = lw_clnclrsky_flux_dn(i+1,k+1); - d_lw_clrsky_flux_up(icol,k) = lw_clrsky_flux_up(i+1,k+1); - d_lw_clrsky_flux_dn(icol,k) = lw_clrsky_flux_dn(i+1,k+1); - d_lw_clnsky_flux_up(icol,k) = lw_clnsky_flux_up(i+1,k+1); - d_lw_clnsky_flux_dn(icol,k) = lw_clnsky_flux_dn(i+1,k+1); - }); - // Extract optical properties for COSP - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& k) { - d_dtau067(icol,k) = cld_tau_sw_bnd(i+1,k+1,idx_067); - d_dtau105(icol,k) = cld_tau_lw_bnd(i+1,k+1,idx_105); - }); - if (d_sw_clrsky_flux_dn(icol,0) > 0) { - d_sunlit(icol) = 1.0; - } else { - d_sunlit(icol) = 0.0; - } - }); - ); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS TIMED_KERNEL( Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const MemberType& team) { const int i = team.league_rank(); @@ -1752,23 +1157,11 @@ void RRTMGPRadiation::run_impl (const double dt) { } }); ); -#ifdef RRTMGP_ENABLE_YAKL - // Sync back to gas_concs_k - real3dk temp(gas_concs_k, std::make_pair(0, ncol), Kokkos::ALL, Kokkos::ALL); - Kokkos::deep_copy(temp, m_gas_concs_k.concs); -#endif -#endif } // loop over chunk // Restore the refCounted array. -#ifdef RRTMGP_ENABLE_YAKL - m_gas_concs.concs = gas_concs; - m_gas_concs.ncol = orig_ncol; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS m_gas_concs_k.concs = gas_concs_k; m_gas_concs_k.ncol = orig_ncol_k; -#endif } // update_rad // Apply temperature tendency; if we updated radiation this timestep, then d_rad_heating_pdel should @@ -1822,16 +1215,10 @@ void RRTMGPRadiation::run_impl (const double dt) { // ========================================================================================= void RRTMGPRadiation::finalize_impl () { -#ifdef RRTMGP_ENABLE_YAKL - m_gas_concs.reset(); - rrtmgp::rrtmgp_finalize(); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS m_gas_concs_k.reset(); // Finalize the interface, passing a bool for rank 0 // to print info about memory stats on that rank interface_t::rrtmgp_finalize(m_comm.am_i_root()); -#endif finalize_kls(); } diff --git a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.hpp b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.hpp index c7c11c9df841..26f4580887d3 100644 --- a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.hpp +++ b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.hpp @@ -46,9 +46,7 @@ class RRTMGPRadiation : public AtmosphereProcess { using lrreal2dk = typename KT::template view_2d; using ulrreal2dk = Unmanaged; -#ifdef RRTMGP_ENABLE_KOKKOS using interface_t = rrtmgp::rrtmgp_interface; -#endif // Constructors RRTMGPRadiation (const ekat::Comm& comm, const ekat::ParameterList& params); @@ -115,12 +113,7 @@ class RRTMGPRadiation : public AtmosphereProcess { int m_ngas; std::vector m_gas_names; real1dk m_gas_mol_weights; -#ifdef RRTMGP_ENABLE_YAKL - GasConcs m_gas_concs; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS GasConcsK m_gas_concs_k; -#endif // Prescribed greenhouse gas surface concentrations in moles / moles air Real m_co2vmr; @@ -152,18 +145,6 @@ class RRTMGPRadiation : public AtmosphereProcess { // 1d size (ncol) ureal1dk cosine_zenith; -#ifdef RRTMGP_ENABLE_YAKL - real1d mu0; - real1d sfc_alb_dir_vis; - real1d sfc_alb_dir_nir; - real1d sfc_alb_dif_vis; - real1d sfc_alb_dif_nir; - real1d sfc_flux_dir_vis; - real1d sfc_flux_dir_nir; - real1d sfc_flux_dif_vis; - real1d sfc_flux_dif_nir; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS ureal1dk mu0_k; ureal1dk sfc_alb_dir_vis_k; ureal1dk sfc_alb_dir_nir_k; @@ -173,28 +154,9 @@ class RRTMGPRadiation : public AtmosphereProcess { ureal1dk sfc_flux_dir_nir_k; ureal1dk sfc_flux_dif_vis_k; ureal1dk sfc_flux_dif_nir_k; -#endif // 2d size (ncol, nlay) ureal2dk d_dz; -#ifdef RRTMGP_ENABLE_YAKL - real2d p_lay; - real2d t_lay; - real2d z_del; - real2d p_del; - real2d qc; - real2d nc; - real2d qi; - real2d cldfrac_tot; - real2d eff_radius_qc; - real2d eff_radius_qi; - real2d tmp2d; - real2d lwp; - real2d iwp; - real2d sw_heating; - real2d lw_heating; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS ureal2dk p_lay_k; ureal2dk t_lay_k; ureal2dk z_del_k; @@ -210,35 +172,9 @@ class RRTMGPRadiation : public AtmosphereProcess { ureal2dk iwp_k; ureal2dk sw_heating_k; ureal2dk lw_heating_k; -#endif // 2d size (ncol, nlay+1) ureal2dk d_tint; -#ifdef RRTMGP_ENABLE_YAKL - real2d p_lev; - real2d t_lev; - real2d sw_flux_up; - real2d sw_flux_dn; - real2d sw_flux_dn_dir; - real2d lw_flux_up; - real2d lw_flux_dn; - real2d sw_clnclrsky_flux_up; - real2d sw_clnclrsky_flux_dn; - real2d sw_clnclrsky_flux_dn_dir; - real2d sw_clrsky_flux_up; - real2d sw_clrsky_flux_dn; - real2d sw_clrsky_flux_dn_dir; - real2d sw_clnsky_flux_up; - real2d sw_clnsky_flux_dn; - real2d sw_clnsky_flux_dn_dir; - real2d lw_clnclrsky_flux_up; - real2d lw_clnclrsky_flux_dn; - real2d lw_clrsky_flux_up; - real2d lw_clrsky_flux_dn; - real2d lw_clnsky_flux_up; - real2d lw_clnsky_flux_dn; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS ureal2dk p_lev_k; ureal2dk t_lev_k; ureal2dk sw_flux_up_k; @@ -261,76 +197,34 @@ class RRTMGPRadiation : public AtmosphereProcess { ureal2dk lw_clrsky_flux_dn_k; ureal2dk lw_clnsky_flux_up_k; ureal2dk lw_clnsky_flux_dn_k; -#endif // 3d size (ncol, nlay+1, nswbands) -#ifdef RRTMGP_ENABLE_YAKL - real3d sw_bnd_flux_up; - real3d sw_bnd_flux_dn; - real3d sw_bnd_flux_dir; - real3d sw_bnd_flux_dif; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS ureal3dk sw_bnd_flux_up_k; ureal3dk sw_bnd_flux_dn_k; ureal3dk sw_bnd_flux_dir_k; ureal3dk sw_bnd_flux_dif_k; -#endif // 3d size (ncol, nlay+1, nlwbands) -#ifdef RRTMGP_ENABLE_YAKL - real3d lw_bnd_flux_up; - real3d lw_bnd_flux_dn; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS ureal3dk lw_bnd_flux_up_k; ureal3dk lw_bnd_flux_dn_k; -#endif // 2d size (ncol, nswbands) -#ifdef RRTMGP_ENABLE_YAKL - real2d sfc_alb_dir; - real2d sfc_alb_dif; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS ureal2dk sfc_alb_dir_k; ureal2dk sfc_alb_dif_k; -#endif // 3d size (ncol, nlay, n[sw,lw]bands) -#ifdef RRTMGP_ENABLE_YAKL - real3d aero_tau_sw; - real3d aero_ssa_sw; - real3d aero_g_sw; - real3d aero_tau_lw; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS ureal3dk aero_tau_sw_k; ureal3dk aero_ssa_sw_k; ureal3dk aero_g_sw_k; ureal3dk aero_tau_lw_k; -#endif // 3d size (ncol, nlay, n[sw,lw]bnds) -#ifdef RRTMGP_ENABLE_YAKL - real3d cld_tau_sw_bnd; - real3d cld_tau_lw_bnd; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS ureal3dk cld_tau_sw_bnd_k; ureal3dk cld_tau_lw_bnd_k; -#endif // 3d size (ncol, nlay, n[sw,lw]gpts) -#ifdef RRTMGP_ENABLE_YAKL - real3d cld_tau_sw_gpt; - real3d cld_tau_lw_gpt; -#endif -#ifdef RRTMGP_ENABLE_KOKKOS ureal3dk cld_tau_sw_gpt_k; ureal3dk cld_tau_lw_gpt_k; -#endif - }; protected: diff --git a/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.cpp b/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.cpp index 65cc99e09cc3..d78cb4ed115a 100644 --- a/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.cpp +++ b/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.cpp @@ -1,7 +1,4 @@ #include "physics/rrtmgp/rrtmgp_test_utils.hpp" -#ifdef RRTMGP_ENABLE_YAKL -#include "YAKL_netcdf.h" -#endif #include #include @@ -17,122 +14,4 @@ bool file_exists(const char *filename) { } } -#ifdef RRTMGP_ENABLE_YAKL -using yakl::fortran::parallel_for; -using yakl::fortran::SimpleBounds; -using yakl::intrinsics::mod; -using yakl::intrinsics::merge; - -bool all_close(real2d &arr1, real2d &arr2, double tolerance) { - int nx = arr1.dimension[0]; - int ny = arr2.dimension[1]; - auto arr1_h = arr1.createHostCopy(); - auto arr2_h = arr2.createHostCopy(); - for (int i=1; i tolerance || std::isnan(arr1_h(i,j) - arr2_h(i,j))) { - printf("arr1 = %f, arr2 = %f at %i,%i\n", arr1_h(i,j), arr2_h(i,j), i, j); - return false; - } - } - } - return true; -} - -void dummy_atmos( - std::string inputfile, - int ncol, real2d &p_lay, real2d &t_lay, - real1d &sfc_alb_dir_vis, real1d &sfc_alb_dir_nir, - real1d &sfc_alb_dif_vis, real1d &sfc_alb_dif_nir, - real1d &mu0, - real2d &lwp, real2d &iwp, real2d &rel, real2d &rei, real2d &cld) { - - // Setup boundary conditions, solar zenith angle, etc - // NOTE: this stuff would come from the model in a real run - - // Ocean-ish values for surface albedos, just for example - memset(sfc_alb_dir_vis , 0.06_wp ); - memset(sfc_alb_dir_nir , 0.06_wp ); - memset(sfc_alb_dif_vis , 0.06_wp ); - memset(sfc_alb_dif_nir , 0.06_wp ); - - // Pick a solar zenith angle; this should come from the model - memset(mu0, 0.86_wp ); - - // Get dummy cloud PHYSICAL properties. Note that this function call - // needs the CloudOptics object only because it uses the min and max - // valid values from the lookup tables for liquid and ice water path to - // create a dummy atmosphere. - dummy_clouds(scream::rrtmgp::cloud_optics_sw, p_lay, t_lay, lwp, iwp, rel, rei, cld); -} - -void dummy_clouds( - CloudOptics &cloud_optics, real2d &p_lay, real2d &t_lay, - real2d &lwp, real2d &iwp, real2d &rel, real2d &rei, real2d &cloud_mask) { - - // Problem sizes - int ncol = t_lay.dimension[0]; - int nlay = t_lay.dimension[1]; - - // Generate some fake liquid and ice water data. We pick values to be midway between - // the min and max of the valid lookup table values for effective radii - real rel_val = 0.5 * (cloud_optics.get_min_radius_liq() + cloud_optics.get_max_radius_liq()); - real rei_val = 0.5 * (cloud_optics.get_min_radius_ice() + cloud_optics.get_max_radius_ice()); - - // Restrict clouds to troposphere (> 100 hPa = 100*100 Pa) and not very close to the ground (< 900 hPa), and - // put them in 2/3 of the columns since that's roughly the total cloudiness of earth. - // Set sane values for liquid and ice water path. - // NOTE: these "sane" values are in g/m2! - parallel_for( SimpleBounds<2>(nlay,ncol) , YAKL_LAMBDA (int ilay, int icol) { - cloud_mask(icol,ilay) = p_lay(icol,ilay) > 100._wp * 100._wp && p_lay(icol,ilay) < 900._wp * 100._wp && mod(icol, 3) != 0; - // Ice and liquid will overlap in a few layers - lwp(icol,ilay) = merge(10._wp, 0._wp, cloud_mask(icol,ilay) && t_lay(icol,ilay) > 263._wp); - iwp(icol,ilay) = merge(10._wp, 0._wp, cloud_mask(icol,ilay) && t_lay(icol,ilay) < 273._wp); - rel(icol,ilay) = merge(rel_val, 0._wp, lwp(icol,ilay) > 0._wp); - rei(icol,ilay) = merge(rei_val, 0._wp, iwp(icol,ilay) > 0._wp); - }); -} - -void read_fluxes( - std::string inputfile, - real2d &sw_flux_up, real2d &sw_flux_dn, real2d &sw_flux_dir, - real2d &lw_flux_up, real2d &lw_flux_dn) { - - // Initialize netcdf reader - yakl::SimpleNetCDF io; - io.open(inputfile, NC_NOWRITE); - - // Initialize arrays to hold fluxes - int nlev = io.getDimSize("lev"); - int ncol = io.getDimSize("col_flx"); - sw_flux_up = real2d("sw_flux_up" , ncol, nlev); - sw_flux_dn = real2d("sw_flux_dn" , ncol, nlev); - sw_flux_dir = real2d("sw_flux_dir", ncol, nlev); - lw_flux_up = real2d("lw_flux_up" , ncol, nlev); - lw_flux_dn = real2d("lw_flux_dn" , ncol, nlev); - - // Read data - io.read(sw_flux_up, "sw_flux_up" ); - io.read(sw_flux_dn, "sw_flux_dn" ); - io.read(sw_flux_dir, "sw_flux_dir"); - io.read(lw_flux_up, "lw_flux_up" ); - io.read(lw_flux_dn, "lw_flux_dn" ); -} - -void write_fluxes( - std::string outputfile, - real2d &sw_flux_up, real2d &sw_flux_dn, real2d &sw_flux_dir, - real2d &lw_flux_up, real2d &lw_flux_dn) { - - yakl::SimpleNetCDF io; - io.create(outputfile); - io.write(sw_flux_up , "sw_flux_up" , {"col_flx","lev"}); - io.write(sw_flux_dn , "sw_flux_dn" , {"col_flx","lev"}); - io.write(sw_flux_dir, "sw_flux_dir", {"col_flx","lev"}); - io.write(lw_flux_up , "lw_flux_up" , {"col_flx","lev"}); - io.write(lw_flux_dn , "lw_flux_dn" , {"col_flx","lev"}); - io.close(); -} -#endif - } // namespace rrtmgp diff --git a/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.hpp b/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.hpp index 32bdfd9e44d2..2a4ad6195bef 100644 --- a/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.hpp +++ b/components/eamxx/src/physics/rrtmgp/rrtmgp_test_utils.hpp @@ -11,37 +11,6 @@ namespace rrtmgpTest { bool file_exists(const char *filename); -#ifdef RRTMGP_ENABLE_YAKL -bool all_close(real2d &arr1, real2d &arr2, double tolerance); - -void dummy_clouds( - CloudOptics &cloud_optics, real2d &p_lay, real2d &t_lay, - real2d &lwp, real2d &iwp, real2d &rel, real2d &rei, real2d &cld -); - -void dummy_atmos( - std::string inputfile, - int ncol, real2d &p_lay, real2d &t_lay, - real1d &sfc_alb_dir_vis, real1d &sfc_alb_dir_nir, - real1d &sfc_alb_dif_vis, real1d &sfc_alb_dif_nir, - real1d &mu0, - real2d &lwp, real2d &iwp, real2d &rel, real2d &rei, real2d &cld -); - -void read_fluxes( - std::string inputfile, - real2d &sw_flux_up, real2d &sw_flux_dn, real2d &sw_flux_dir, - real2d &lw_flux_up, real2d &lw_flux_dn -); - -void write_fluxes( - std::string outputfile, - real2d &sw_flux_up, real2d &sw_flux_dn, real2d &sw_flux_dir, - real2d &lw_flux_up, real2d &lw_flux_dn -); -#endif - -#ifdef RRTMGP_ENABLE_KOKKOS template struct rrtmgp_test_utils { @@ -170,7 +139,6 @@ static void write_fluxes( } }; -#endif } #endif diff --git a/components/eamxx/src/physics/rrtmgp/rrtmgp_utils.hpp b/components/eamxx/src/physics/rrtmgp/rrtmgp_utils.hpp index 863597a4fea3..878a6841ec44 100644 --- a/components/eamxx/src/physics/rrtmgp/rrtmgp_utils.hpp +++ b/components/eamxx/src/physics/rrtmgp/rrtmgp_utils.hpp @@ -5,22 +5,9 @@ #include "cpp/rrtmgp_const.h" #include "cpp/rrtmgp_conversion.h" -#ifdef RRTMGP_ENABLE_YAKL -#include "YAKL.h" -#include "YAKL_Bounds_fortran.h" -#endif - namespace scream { namespace rrtmgp { -#ifdef RRTMGP_ENABLE_YAKL -// Things we need from YAKL -using yakl::intrinsics::maxval; -using yakl::intrinsics::minval; -using yakl::intrinsics::count; -using yakl::intrinsics::sum; -#endif - // Provide a routine to compute heating due to radiative fluxes. This is // computed as net flux into a layer, converted to a heating rate. It is // the responsibility of the user to ensure fields are passed with the @@ -31,24 +18,6 @@ using yakl::intrinsics::sum; // of approximating pdel by differencing the level interface pressures. // We are leaving this for the time being for consistency with SCREAMv0, // from which this code was directly ported. -#ifdef RRTMGP_ENABLE_YAKL -template -void compute_heating_rate ( - yakl::Array const &flux_up, yakl::Array const &flux_dn, - yakl::Array const &pdel , yakl::Array &heating_rate - ) { - using physconst = scream::physics::Constants; - auto ncol = flux_up.dimension[0]; - auto nlay = flux_up.dimension[1]-1; - TIMED_KERNEL(yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<2>(nlay,ncol), YAKL_LAMBDA(int ilay, int icol) { - heating_rate(icol,ilay) = ( - flux_up(icol,ilay+1) - flux_up(icol,ilay) - - flux_dn(icol,ilay+1) + flux_dn(icol,ilay) - ) * physconst::gravit / (physconst::Cpair * pdel(icol,ilay)); - })); -} -#endif -#ifdef RRTMGP_ENABLE_KOKKOS template void compute_heating_rate ( View1 const &flux_up, @@ -67,7 +36,6 @@ void compute_heating_rate ( ) * physconst::gravit / (physconst::Cpair * pdel(icol,ilay)); )); } -#endif inline bool radiation_do(const int irad, const int nstep) { // If irad == 0, then never do radiation; @@ -83,35 +51,6 @@ inline bool radiation_do(const int irad, const int nstep) { // Verify that array only contains values within valid range, and if not // report min and max of array -#ifdef RRTMGP_ENABLE_YAKL -template -bool check_range(T x, Real xmin, Real xmax, std::string msg, std::ostream& out=std::cout) { - bool pass = true; - auto _xmin = minval(x); - auto _xmax = maxval(x); - if (_xmin < xmin or _xmax > xmax) { - // How many outside range? - auto bad_mask = x.createDeviceCopy(); - memset(bad_mask, 0); - yakl::c::parallel_for(yakl::c::SimpleBounds<1>(x.totElems()), YAKL_LAMBDA (int i) { - if (x.data()[i] < xmin or x.data()[i] > xmax) { - bad_mask.data()[i] = 1; - } - }); - auto num_bad = sum(bad_mask); - if (num_bad > 0) { - pass = false; - out << msg << ": " - << num_bad << " values outside range " - << "[" << xmin << "," << xmax << "]" - << "; minval = " << _xmin - << "; maxval = " << _xmax << "\n"; - } - } - return pass; -} -#endif -#ifdef RRTMGP_ENABLE_KOKKOS template ::type* dummy = nullptr> bool check_range_k(T x, typename T::const_value_type xmin, typename T::const_value_type xmax, std::string msg, std::ostream& out=std::cout) { @@ -197,9 +136,6 @@ bool check_range_k(T x, typename T::const_value_type xmin, typename T::const_val return pass; } - -#endif - } // namespace rrtmgp } // namespace scream diff --git a/components/eamxx/src/physics/rrtmgp/tests/generate_baseline.cpp b/components/eamxx/src/physics/rrtmgp/tests/generate_baseline.cpp index 7d107aec284b..6b58b27c7249 100644 --- a/components/eamxx/src/physics/rrtmgp/tests/generate_baseline.cpp +++ b/components/eamxx/src/physics/rrtmgp/tests/generate_baseline.cpp @@ -7,10 +7,6 @@ #include #include -#ifdef RRTMGP_ENABLE_YAKL -#include -#endif - #include #include @@ -31,14 +27,6 @@ int main (int argc, char** argv) { using namespace ekat::logger; using logger_t = Logger; -#ifdef RRTMGP_ENABLE_YAKL - using r1d = real1d; - using r2d = real2d; - using r3d = real3d; - using gas_concs_t = GasConcs; - namespace utils_t = rrtmgpTest; - namespace interface_t = scream::rrtmgp; -#else using layout_t = Kokkos::LayoutLeft; using interface_t = scream::rrtmgp::rrtmgp_interface; using utils_t = rrtmgpTest::rrtmgp_test_utils; @@ -47,7 +35,6 @@ int main (int argc, char** argv) { using r2d = typename interface_t::real2dk; using r3d = typename interface_t::real3dk; using MDRP = typename interface_t::MDRP; -#endif ekat::Comm comm(MPI_COMM_WORLD); auto logger = std::make_shared("",LogLevel::info,comm); @@ -76,13 +63,8 @@ int main (int argc, char** argv) { utils_t::read_fluxes(inputfile, sw_flux_up_ref, sw_flux_dn_ref, sw_flux_dn_dir_ref, lw_flux_up_ref, lw_flux_dn_ref ); // Get dimension sizes -#ifdef RRTMGP_ENABLE_YAKL - const int ncol = sw_flux_up_ref.dimension[0]; - const int nlev = sw_flux_up_ref.dimension[1]; -#else const int ncol = sw_flux_up_ref.extent(0); const int nlev = sw_flux_up_ref.extent(1); -#endif const int nlay = nlev - 1; // Read in dummy Garand atmosphere; if this were an actual model simulation, @@ -103,11 +85,7 @@ int main (int argc, char** argv) { // Initialize the RRTMGP interface; this will read in the k-distribution // data that contains information about absorption coefficients for gases logger->info("rrtmgp_initialize..."); -#ifdef RRTMGP_ENABLE_YAKL - interface_t::rrtmgp_initialize(gas_concs, coefficients_file_sw, coefficients_file_lw, cloud_optics_file_sw, cloud_optics_file_lw, logger); -#else interface_t::rrtmgp_initialize(gas_concs, coefficients_file_sw, coefficients_file_lw, cloud_optics_file_sw, cloud_optics_file_lw, logger, 2.0); -#endif // Setup dummy all-sky problem r1d sfc_alb_dir_vis ("sfc_alb_dir_vis", ncol); @@ -132,13 +110,8 @@ int main (int argc, char** argv) { // input/outputs into the driver (persisting between calls), and // we would just have to setup the pointers to them in the // FluxesBroadband object -#ifdef RRTMGP_ENABLE_YAKL - const auto nswbands = scream::rrtmgp::k_dist_sw.get_nband(); - const auto nlwbands = scream::rrtmgp::k_dist_lw.get_nband(); -#else const auto nswbands = interface_t::k_dist_sw_k->get_nband(); const auto nlwbands = interface_t::k_dist_lw_k->get_nband(); -#endif r2d sw_flux_up ("sw_flux_up" , ncol, nlay+1); r2d sw_flux_dn ("sw_flux_dn" , ncol, nlay+1); r2d sw_flux_dn_dir("sw_flux_dn_dir", ncol, nlay+1); @@ -179,32 +152,19 @@ int main (int argc, char** argv) { auto aer_ssa_sw = r3d("aer_ssa_sw", ncol, nlay, nswbands); auto aer_asm_sw = r3d("aer_asm_sw", ncol, nlay, nswbands); auto aer_tau_lw = r3d("aer_tau_lw", ncol, nlay, nlwbands); -#ifdef RRTMGP_ENABLE_YAKL - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<3>(nswbands,nlay,ncol), YAKL_LAMBDA(int ibnd, int ilay, int icol) { -#else Kokkos::parallel_for( MDRP::template get<3>({nswbands,nlay,ncol}) , KOKKOS_LAMBDA (int ibnd, int ilay, int icol) { -#endif aer_tau_sw(icol,ilay,ibnd) = 0; aer_ssa_sw(icol,ilay,ibnd) = 0; aer_asm_sw(icol,ilay,ibnd) = 0; }); -#ifdef RRTMGP_ENABLE_YAKL - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<3>(nlwbands,nlay,ncol), YAKL_LAMBDA(int ibnd, int ilay, int icol) { -#else Kokkos::parallel_for( MDRP::template get<3>({nlwbands,nlay,ncol}) , KOKKOS_LAMBDA (int ibnd, int ilay, int icol) { -#endif aer_tau_lw(icol,ilay,ibnd) = 0; }); // These are returned as outputs now from rrtmgp_main // TODO: provide as inputs consistent with how aerosol is treated? -#ifdef RRTMGP_ENABLE_YAKL - const auto nswgpts = scream::rrtmgp::k_dist_sw.get_ngpt(); - const auto nlwgpts = scream::rrtmgp::k_dist_lw.get_ngpt(); -#else const auto nswgpts = interface_t::k_dist_sw_k->get_ngpt(); const auto nlwgpts = interface_t::k_dist_lw_k->get_ngpt(); -#endif auto cld_tau_sw_bnd = r3d("cld_tau_sw_bnd", ncol, nlay, nswbands); auto cld_tau_lw_bnd = r3d("cld_tau_lw_bnd", ncol, nlay, nlwbands); auto cld_tau_sw = r3d("cld_tau_sw", ncol, nlay, nswgpts); @@ -248,63 +208,6 @@ int main (int argc, char** argv) { // Clean up from test; this is probably not necessary, these things // should be deallocated when they fall out of scope, but we should be // good citizens and clean up our mess. -#ifdef RRTMGP_ENABLE_YAKL - p_lay.deallocate(); - t_lay.deallocate(); - p_lev.deallocate(); - t_lev.deallocate(); - col_dry.deallocate(); - sfc_alb_dir_vis.deallocate(); - sfc_alb_dir_nir.deallocate(); - sfc_alb_dif_vis.deallocate(); - sfc_alb_dif_nir.deallocate(); - sfc_alb_dir.deallocate(); - sfc_alb_dif.deallocate(); - mu0.deallocate(); - lwp.deallocate(); - iwp.deallocate(); - rel.deallocate(); - rei.deallocate(); - cld.deallocate(); - aer_tau_sw.deallocate(); - aer_ssa_sw.deallocate(); - aer_asm_sw.deallocate(); - aer_tau_lw.deallocate(); - cld_tau_sw.deallocate(); - cld_tau_lw.deallocate(); - cld_tau_sw_bnd.deallocate(); - cld_tau_lw_bnd.deallocate(); - sw_flux_up_ref.deallocate(); - sw_flux_dn_ref.deallocate(); - sw_flux_dn_dir_ref.deallocate(); - lw_flux_up_ref.deallocate(); - lw_flux_dn_ref.deallocate(); - sw_flux_up.deallocate(); - sw_flux_dn.deallocate(); - sw_flux_dn_dir.deallocate(); - lw_flux_up.deallocate(); - lw_flux_dn.deallocate(); - sw_clnclrsky_flux_up.deallocate(); - sw_clnclrsky_flux_dn.deallocate(); - sw_clnclrsky_flux_dn_dir.deallocate(); - sw_clrsky_flux_up.deallocate(); - sw_clrsky_flux_dn.deallocate(); - sw_clrsky_flux_dn_dir.deallocate(); - sw_clnsky_flux_up.deallocate(); - sw_clnsky_flux_dn.deallocate(); - sw_clnsky_flux_dn_dir.deallocate(); - lw_clnclrsky_flux_up.deallocate(); - lw_clnclrsky_flux_dn.deallocate(); - lw_clrsky_flux_up.deallocate(); - lw_clrsky_flux_dn.deallocate(); - lw_clnsky_flux_up.deallocate(); - lw_clnsky_flux_dn.deallocate(); - sw_bnd_flux_up.deallocate(); - sw_bnd_flux_dn.deallocate(); - sw_bnd_flux_dir.deallocate(); - lw_bnd_flux_up.deallocate(); - lw_bnd_flux_dn.deallocate(); -#endif gas_concs.reset(); interface_t::rrtmgp_finalize(); diff --git a/components/eamxx/src/physics/rrtmgp/tests/rrtmgp_tests.cpp b/components/eamxx/src/physics/rrtmgp/tests/rrtmgp_tests.cpp index bc5d9eca70f1..811a3bc61ddf 100644 --- a/components/eamxx/src/physics/rrtmgp/tests/rrtmgp_tests.cpp +++ b/components/eamxx/src/physics/rrtmgp/tests/rrtmgp_tests.cpp @@ -7,9 +7,6 @@ #include "cpp/rrtmgp/mo_gas_concentrations.h" #include "examples/all-sky/mo_garand_atmos_io.h" -#ifdef RRTMGP_ENABLE_YAKL -#include "YAKL.h" -#endif #include "ekat/util/ekat_test_utils.hpp" #include @@ -30,291 +27,6 @@ void expect_another_arg (int i, int argc) { EKAT_REQUIRE_MSG(i != argc-1, "Expected another cmd-line arg."); } -#ifdef RRTMGP_ENABLE_YAKL -int run_yakl(int argc, char** argv) { - using namespace ekat::logger; - using logger_t = Logger; - - ekat::Comm comm(MPI_COMM_WORLD); - auto logger = std::make_shared("",LogLevel::info,comm); - - // Parse command line arguments - if (argc < 3) { - std::string msg = "Missing required inputs. Usage:\n"; - msg += argv[0]; - msg += " -i -b [options]\n"; - logger->error(msg); - return 1; - } - std::string inputfile, baseline; - - for (int i = 1; i < argc-1; ++i) { - if (ekat::argv_matches(argv[i], "-b", "--baseline-file")) { - expect_another_arg(i, argc); - ++i; - baseline = argv[i]; - } - if (ekat::argv_matches(argv[i], "-i", "--input-file")) { - expect_another_arg(i, argc); - ++i; - inputfile = argv[i]; - } - // RRTMGP baselines tests to not use kokoks. Swallow the arg, but ignore it - if (std::string(argv[i])=="--kokkos-device-id=") { - continue; - } - } - - // Check to see that inputfiles exist - if (!rrtmgpTest::file_exists(inputfile.c_str())) { - logger->error("Inputfile " + inputfile + " does not exist.\n"); - return -1; - } - if (!rrtmgpTest::file_exists(baseline.c_str())) { - logger->error("Baseline " + baseline + " does not exist.\n"); - return -1; - } - - // Initialize yakl - logger->info("Initialize yakl...\n"); - yakl::init(); - - // Get reference fluxes from input file; do this here so we can get ncol dimension - logger->info("Read fluxes...\n"); - real2d sw_flux_up_ref; - real2d sw_flux_dn_ref; - real2d sw_flux_dir_ref; - real2d lw_flux_up_ref; - real2d lw_flux_dn_ref; - rrtmgpTest::read_fluxes(inputfile, sw_flux_up_ref, sw_flux_dn_ref, sw_flux_dir_ref, lw_flux_up_ref, lw_flux_dn_ref ); - - // Get dimension sizes - int ncol = sw_flux_up_ref.dimension[0]; - int nlev = sw_flux_up_ref.dimension[1]; - int nlay = nlev - 1; - - // Read in dummy Garand atmosphere; if this were an actual model simulation, - // these would be passed as inputs to the driver - // NOTE: set ncol to size of col_flx dimension in the input file. This is so - // that we can compare to the reference data provided in that file. Note that - // this will copy the first column of the input data (the first profile) ncol - // times. We will then fill some fraction of these columns with clouds for - // the test problem. - logger->info("Read dummy atmos...\n"); - real2d p_lay("p_lay", ncol, nlay); - real2d t_lay("t_lay", ncol, nlay); - real2d p_lev("p_lev", ncol, nlay+1); - real2d t_lev("t_lev", ncol, nlay+1); - real2d col_dry; - GasConcs gas_concs; - read_atmos(inputfile, p_lay, t_lay, p_lev, t_lev, gas_concs, col_dry, ncol); - - // Initialize absorption coefficients - logger->info("Initialize RRTMGP...\n"); - scream::rrtmgp::rrtmgp_initialize(gas_concs, coefficients_file_sw, coefficients_file_lw, cloud_optics_file_sw, cloud_optics_file_lw, logger); - - // Setup our dummy atmosphere based on the input data we read in - logger->info("Setup dummy atmos...\n"); - real1d sfc_alb_dir_vis("sfc_alb_dir_vis", ncol); - real1d sfc_alb_dir_nir("sfc_alb_dir_nir", ncol); - real1d sfc_alb_dif_vis("sfc_alb_dif_vis", ncol); - real1d sfc_alb_dif_nir("sfc_alb_dif_nir", ncol); - real1d mu0("mu0", ncol); - real2d lwp("lwp", ncol, nlay); - real2d iwp("iwp", ncol, nlay); - real2d rel("rel", ncol, nlay); - real2d rei("rei", ncol, nlay); - real2d cld("cld", ncol, nlay); - rrtmgpTest::dummy_atmos( - inputfile, ncol, p_lay, t_lay, - sfc_alb_dir_vis, sfc_alb_dir_nir, - sfc_alb_dif_vis, sfc_alb_dif_nir, - mu0, - lwp, iwp, rel, rei, cld - ); - - // Setup flux outputs; In a real model run, the fluxes would be - // input/outputs into the driver (persisting between calls), and - // we would just have to setup the pointers to them in the - // FluxesBroadband object - logger->info("Setup fluxes...\n"); - const auto nswbands = scream::rrtmgp::k_dist_sw.get_nband(); - const auto nlwbands = scream::rrtmgp::k_dist_lw.get_nband(); - real2d sw_flux_up ("sw_flux_up" , ncol, nlay+1); - real2d sw_flux_dn ("sw_flux_dn" , ncol, nlay+1); - real2d sw_flux_dir("sw_flux_dir", ncol, nlay+1); - real2d lw_flux_up ("lw_flux_up" , ncol, nlay+1); - real2d lw_flux_dn ("lw_flux_dn" , ncol, nlay+1); - real2d sw_clnclrsky_flux_up ("sw_clnclrsky_flux_up" , ncol, nlay+1); - real2d sw_clnclrsky_flux_dn ("sw_clnclrsky_flux_dn" , ncol, nlay+1); - real2d sw_clnclrsky_flux_dir("sw_clnclrsky_flux_dir", ncol, nlay+1); - real2d sw_clrsky_flux_up ("sw_clrsky_flux_up" , ncol, nlay+1); - real2d sw_clrsky_flux_dn ("sw_clrsky_flux_dn" , ncol, nlay+1); - real2d sw_clrsky_flux_dir("sw_clrsky_flux_dir", ncol, nlay+1); - real2d sw_clnsky_flux_up ("sw_clnsky_flux_up" , ncol, nlay+1); - real2d sw_clnsky_flux_dn ("sw_clnsky_flux_dn" , ncol, nlay+1); - real2d sw_clnsky_flux_dir("sw_clnsky_flux_dir", ncol, nlay+1); - real2d lw_clnclrsky_flux_up ("lw_clnclrsky_flux_up" , ncol, nlay+1); - real2d lw_clnclrsky_flux_dn ("lw_clnclrsky_flux_dn" , ncol, nlay+1); - real2d lw_clrsky_flux_up ("lw_clrsky_flux_up" , ncol, nlay+1); - real2d lw_clrsky_flux_dn ("lw_clrsky_flux_dn" , ncol, nlay+1); - real2d lw_clnsky_flux_up ("lw_clnsky_flux_up" , ncol, nlay+1); - real2d lw_clnsky_flux_dn ("lw_clnsky_flux_dn" , ncol, nlay+1); - real3d sw_bnd_flux_up ("sw_bnd_flux_up" , ncol, nlay+1, nswbands); - real3d sw_bnd_flux_dn ("sw_bnd_flux_dn" , ncol, nlay+1, nswbands); - real3d sw_bnd_flux_dir("sw_bnd_flux_dir", ncol, nlay+1, nswbands); - real3d lw_bnd_flux_up ("lw_bnd_flux_up" , ncol, nlay+1, nlwbands); - real3d lw_bnd_flux_dn ("lw_bnd_flux_dn" , ncol, nlay+1, nlwbands); - - // Compute band-by-band surface_albedos. - real2d sfc_alb_dir("sfc_alb_dir", ncol, nswbands); - real2d sfc_alb_dif("sfc_alb_dif", ncol, nswbands); - rrtmgp::compute_band_by_band_surface_albedos( - ncol, nswbands, - sfc_alb_dir_vis, sfc_alb_dir_nir, - sfc_alb_dif_vis, sfc_alb_dif_nir, - sfc_alb_dir, sfc_alb_dif); - - // Setup some dummy aerosol optical properties - auto aer_tau_sw = real3d("aer_tau_sw", ncol, nlay, nswbands); - auto aer_ssa_sw = real3d("aer_ssa_sw", ncol, nlay, nswbands); - auto aer_asm_sw = real3d("aer_asm_sw", ncol, nlay, nswbands); - auto aer_tau_lw = real3d("aer_tau_lw", ncol, nlay, nlwbands); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<3>(nswbands,nlay,ncol), YAKL_LAMBDA(int ibnd, int ilay, int icol) { - aer_tau_sw(icol,ilay,ibnd) = 0; - aer_ssa_sw(icol,ilay,ibnd) = 0; - aer_asm_sw(icol,ilay,ibnd) = 0; - }); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<3>(nlwbands,nlay,ncol), YAKL_LAMBDA(int ibnd, int ilay, int icol) { - aer_tau_lw(icol,ilay,ibnd) = 0; - }); - - // These are returned as outputs now from rrtmgp_main - // TODO: provide as inputs consistent with how aerosol is treated? - const auto nswgpts = scream::rrtmgp::k_dist_sw.get_ngpt(); - const auto nlwgpts = scream::rrtmgp::k_dist_lw.get_ngpt(); - auto cld_tau_sw_bnd = real3d("cld_tau_sw_bnd", ncol, nlay, nswbands); - auto cld_tau_lw_bnd = real3d("cld_tau_lw_bnd", ncol, nlay, nlwbands); - auto cld_tau_sw = real3d("cld_tau_sw", ncol, nlay, nswgpts); - auto cld_tau_lw = real3d("cld_tau_lw", ncol, nlay, nlwgpts); - - // Run RRTMGP code on dummy atmosphere - logger->info("Run RRTMGP...\n"); - const Real tsi_scaling = 1; - scream::rrtmgp::rrtmgp_main( - ncol, nlay, - p_lay, t_lay, p_lev, t_lev, gas_concs, - sfc_alb_dir, sfc_alb_dif, mu0, - lwp, iwp, rel, rei, cld, - aer_tau_sw, aer_ssa_sw, aer_asm_sw, aer_tau_lw, - cld_tau_sw_bnd, cld_tau_lw_bnd, // outputs - cld_tau_sw, cld_tau_lw, // outputs - sw_flux_up, sw_flux_dn, sw_flux_dir, - lw_flux_up, lw_flux_dn, - sw_clnclrsky_flux_up, sw_clnclrsky_flux_dn, sw_clnclrsky_flux_dir, - sw_clrsky_flux_up, sw_clrsky_flux_dn, sw_clrsky_flux_dir, - sw_clnsky_flux_up, sw_clnsky_flux_dn, sw_clnsky_flux_dir, - lw_clnclrsky_flux_up, lw_clnclrsky_flux_dn, - lw_clrsky_flux_up, lw_clrsky_flux_dn, - lw_clnsky_flux_up, lw_clnsky_flux_dn, - sw_bnd_flux_up, sw_bnd_flux_dn, sw_bnd_flux_dir, - lw_bnd_flux_up, lw_bnd_flux_dn, tsi_scaling, logger, - true, true // extra_clnclrsky_diag, extra_clnsky_diag - // set them both to true because we are testing them below - ); - - // Check values against baseline - logger->info("Check values...\n"); - rrtmgpTest::read_fluxes( - baseline, - sw_flux_up_ref, sw_flux_dn_ref, sw_flux_dir_ref, - lw_flux_up_ref, lw_flux_dn_ref - ); - int nerr = 0; - if (!rrtmgpTest::all_close(sw_flux_up_ref , sw_flux_up , 0.001)) nerr++; - if (!rrtmgpTest::all_close(sw_flux_dn_ref , sw_flux_dn , 0.001)) nerr++; - if (!rrtmgpTest::all_close(sw_flux_dir_ref, sw_flux_dir, 0.001)) nerr++; - if (!rrtmgpTest::all_close(lw_flux_up_ref , lw_flux_up , 0.001)) nerr++; - if (!rrtmgpTest::all_close(lw_flux_dn_ref , lw_flux_dn , 0.001)) nerr++; - - // Because the aerosol optical properties are all set to zero, these fluxes must be equal - if (!rrtmgpTest::all_close(sw_flux_up , sw_clnsky_flux_up , 0.0000000001)) nerr++; - if (!rrtmgpTest::all_close(sw_clrsky_flux_up , sw_clnclrsky_flux_up , 0.0000000001)) nerr++; - if (!rrtmgpTest::all_close(sw_flux_dn , sw_clnsky_flux_dn , 0.0000000001)) nerr++; - if (!rrtmgpTest::all_close(sw_clrsky_flux_dn , sw_clnclrsky_flux_dn , 0.0000000001)) nerr++; - if (!rrtmgpTest::all_close(sw_flux_dir , sw_clnsky_flux_dir , 0.0000000001)) nerr++; - if (!rrtmgpTest::all_close(sw_clrsky_flux_dir , sw_clnclrsky_flux_dir , 0.0000000001)) nerr++; - if (!rrtmgpTest::all_close(lw_flux_up , lw_clnsky_flux_up , 0.0000000001)) nerr++; - if (!rrtmgpTest::all_close(lw_clrsky_flux_up , lw_clnclrsky_flux_up , 0.0000000001)) nerr++; - if (!rrtmgpTest::all_close(lw_flux_dn , lw_clnsky_flux_dn , 0.0000000001)) nerr++; - if (!rrtmgpTest::all_close(lw_clrsky_flux_dn , lw_clnclrsky_flux_dn , 0.0000000001)) nerr++; - - logger->info("Cleaning up...\n"); - // Clean up or else YAKL will throw errors - scream::rrtmgp::rrtmgp_finalize(); - sw_flux_up_ref.deallocate(); - sw_flux_dn_ref.deallocate(); - sw_flux_dir_ref.deallocate(); - lw_flux_up_ref.deallocate(); - lw_flux_dn_ref.deallocate(); - sw_flux_up.deallocate(); - sw_flux_dn.deallocate(); - sw_flux_dir.deallocate(); - lw_flux_up.deallocate(); - lw_flux_dn.deallocate(); - sw_clnclrsky_flux_up.deallocate(); - sw_clnclrsky_flux_dn.deallocate(); - sw_clnclrsky_flux_dir.deallocate(); - sw_clrsky_flux_up.deallocate(); - sw_clrsky_flux_dn.deallocate(); - sw_clrsky_flux_dir.deallocate(); - sw_clnsky_flux_up.deallocate(); - sw_clnsky_flux_dn.deallocate(); - sw_clnsky_flux_dir.deallocate(); - lw_clnclrsky_flux_up.deallocate(); - lw_clnclrsky_flux_dn.deallocate(); - lw_clrsky_flux_up.deallocate(); - lw_clrsky_flux_dn.deallocate(); - lw_clnsky_flux_up.deallocate(); - lw_clnsky_flux_dn.deallocate(); - sw_bnd_flux_up.deallocate(); - sw_bnd_flux_dn.deallocate(); - sw_bnd_flux_dir.deallocate(); - lw_bnd_flux_up.deallocate(); - lw_bnd_flux_dn.deallocate(); - p_lay.deallocate(); - t_lay.deallocate(); - p_lev.deallocate(); - t_lev.deallocate(); - gas_concs.reset(); - sfc_alb_dir_vis.deallocate(); - sfc_alb_dir_nir.deallocate(); - sfc_alb_dif_vis.deallocate(); - sfc_alb_dif_nir.deallocate(); - sfc_alb_dir.deallocate(); - sfc_alb_dif.deallocate(); - mu0.deallocate(); - lwp.deallocate(); - iwp.deallocate(); - rel.deallocate(); - rei.deallocate(); - cld.deallocate(); - aer_tau_sw.deallocate(); - aer_ssa_sw.deallocate(); - aer_asm_sw.deallocate(); - aer_tau_lw.deallocate(); - cld_tau_sw.deallocate(); - cld_tau_lw.deallocate(); - cld_tau_sw_bnd.deallocate(); - cld_tau_lw_bnd.deallocate(); - col_dry.deallocate(); - yakl::finalize(); - - return nerr != 0 ? 1 : 0; -} // end of main driver code -#endif - -#ifdef RRTMGP_ENABLE_KOKKOS int run_kokkos(int argc, char** argv) { using namespace ekat::logger; using logger_t = Logger; @@ -365,8 +77,8 @@ int run_kokkos(int argc, char** argv) { return -1; } - // Initialize yakl - logger->info("Initialize yakl...\n"); + // Initialize kokkos + logger->info("Initialize kokkos...\n"); scream::init_kls(); // Get reference fluxes from input file; do this here so we can get ncol dimension @@ -540,13 +252,12 @@ int run_kokkos(int argc, char** argv) { if (!utils_t::all_close(lw_clrsky_flux_dn , lw_clnclrsky_flux_dn , 0.0000000001)) nerr++; logger->info("Cleaning up...\n"); - // Clean up or else YAKL will throw errors + // Clean up or else Kokkos will throw errors interface_t::rrtmgp_finalize(); scream::finalize_kls(); return nerr != 0 ? 1 : 0; } // end of main driver code -#endif } @@ -554,12 +265,7 @@ int main(int argc, char** argv) { MPI_Init(&argc,&argv); int ret = 0; -#ifdef RRTMGP_ENABLE_YAKL - ret += run_yakl(argc,argv); -#endif -#ifdef RRTMGP_ENABLE_KOKKOS ret += run_kokkos(argc,argv); -#endif MPI_Finalize(); return ret; diff --git a/components/eamxx/src/physics/rrtmgp/tests/rrtmgp_unit_tests.cpp b/components/eamxx/src/physics/rrtmgp/tests/rrtmgp_unit_tests.cpp index b179533b4eaf..b6ada0baed4b 100644 --- a/components/eamxx/src/physics/rrtmgp/tests/rrtmgp_unit_tests.cpp +++ b/components/eamxx/src/physics/rrtmgp/tests/rrtmgp_unit_tests.cpp @@ -5,19 +5,13 @@ #include "physics/rrtmgp/shr_orb_mod_c2f.hpp" #include "mo_load_coefficients.h" -#ifdef RRTMGP_ENABLE_YAKL -#include "YAKL.h" -#endif - namespace { -#ifdef RRTMGP_ENABLE_KOKKOS template auto chc(const View& view) { return Kokkos::create_mirror_view_and_copy(HostDevice(), view); } -#endif // Names of input files we will need. std::string coefficients_file_sw = SCREAM_DATA_DIR "/init/rrtmgp-data-sw-g112-210809.nc"; @@ -25,830 +19,6 @@ std::string coefficients_file_lw = SCREAM_DATA_DIR "/init/rrtmgp-data-lw-g128-21 std::string cloud_optics_file_sw = SCREAM_DATA_DIR "/init/rrtmgp-cloud-optics-coeffs-sw.nc"; std::string cloud_optics_file_lw = SCREAM_DATA_DIR "/init/rrtmgp-cloud-optics-coeffs-lw.nc"; -#ifdef RRTMGP_ENABLE_YAKL -TEST_CASE("rrtmgp_test_heating") { - // Initialize YAKL - if (!yakl::isInitialized()) { yakl::init(); } - - // Test heating rate function by passing simple inputs - auto dp = real2d("dp", 1, 1); - auto flux_up = real2d("flux_up", 1, 2); - auto flux_dn = real2d("flux_dn", 1, 2); - auto heating = real2d("heating", 1, 1); - // Simple no-heating test - // NOTE: yakl::fortran::parallel_for because we need to do these in a kernel on the device - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - dp(1, 1) = 10; - flux_up(1, 1) = 1.0; - flux_up(1, 2) = 1.0; - flux_dn(1, 1) = 1.0; - flux_dn(1, 2) = 1.0; - }); - scream::rrtmgp::compute_heating_rate(flux_up, flux_dn, dp, heating); - REQUIRE(heating.createHostCopy()(1,1) == 0); - - // Simple net postive heating; net flux into layer should be 1.0 - // NOTE: yakl::fortran::parallel_for because we need to do these in a kernel on the device - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - flux_up(1, 1) = 1.0; - flux_up(1, 2) = 1.0; - flux_dn(1, 1) = 1.5; - flux_dn(1, 2) = 0.5; - }); - using physconst = scream::physics::Constants; - auto g = physconst::gravit; //9.81; - auto cp_air = physconst::Cpair; //1005.0; - auto pdel = dp.createHostCopy()(1,1); - auto heating_ref = 1.0 * g / (cp_air * pdel); - scream::rrtmgp::compute_heating_rate(flux_up, flux_dn, dp, heating); - REQUIRE(heating.createHostCopy()(1,1) == heating_ref); - - // Simple net negative heating; net flux into layer should be -1.0 - // NOTE: yakl::fortran::parallel_for because we need to do these in a kernel on the device - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - flux_up(1,1) = 1.5; - flux_up(1,2) = 0.5; - flux_dn(1,1) = 1.0; - flux_dn(1,2) = 1.0; - }); - heating_ref = -1.0 * g / (cp_air * pdel); - scream::rrtmgp::compute_heating_rate(flux_up, flux_dn, dp, heating); - REQUIRE(heating.createHostCopy()(1,1) == heating_ref); - - // Clean up - dp.deallocate(); - flux_up.deallocate(); - flux_dn.deallocate(); - heating.deallocate(); - yakl::finalize(); -} - -TEST_CASE("rrtmgp_test_mixing_ratio_to_cloud_mass") { - // Initialize YAKL - if (!yakl::isInitialized()) { yakl::init(); } - - using physconst = scream::physics::Constants; - - // Test mixing ratio to cloud mass function by passing simple inputs - auto dp = real2d("dp", 1, 1); - auto mixing_ratio = real2d("mixing_ratio", 1, 1); - auto cloud_fraction = real2d("cloud_fration", 1, 1); - auto cloud_mass = real2d("cloud_mass", 1, 1); - - // Test with cell completely filled with cloud - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - dp(1,1) = 10.0; - mixing_ratio(1,1) = 0.0001; - cloud_fraction(1,1) = 1.0; - }); - auto cloud_mass_ref = mixing_ratio.createHostCopy()(1,1) / cloud_fraction.createHostCopy()(1,1) * dp.createHostCopy()(1,1) / physconst::gravit; - scream::rrtmgp::mixing_ratio_to_cloud_mass(mixing_ratio, cloud_fraction, dp, cloud_mass); - REQUIRE(cloud_mass.createHostCopy()(1,1) == cloud_mass_ref); - - // Test with no cloud - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - dp(1,1) = 10.0; - mixing_ratio(1,1) = 0.0; - cloud_fraction(1,1) = 0.0; - }); - cloud_mass_ref = 0.0; - scream::rrtmgp::mixing_ratio_to_cloud_mass(mixing_ratio, cloud_fraction, dp, cloud_mass); - REQUIRE(cloud_mass.createHostCopy()(1,1) == cloud_mass_ref); - - // Test with empty clouds (cloud fraction but with no associated mixing ratio) - // This case could happen if we use a total cloud fraction, but compute layer - // cloud mass separately for liquid and ice. - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - dp(1,1) = 10.0; - mixing_ratio(1,1) = 0.0; - cloud_fraction(1,1) = 0.1; - }); - cloud_mass_ref = 0.0; - scream::rrtmgp::mixing_ratio_to_cloud_mass(mixing_ratio, cloud_fraction, dp, cloud_mass); - REQUIRE(cloud_mass.createHostCopy()(1,1) == cloud_mass_ref); - - // Test with cell half filled with cloud - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - dp(1,1) = 10.0; - mixing_ratio(1,1) = 0.0001; - cloud_fraction(1,1) = 0.5; - }); - cloud_mass_ref = mixing_ratio.createHostCopy()(1,1) / cloud_fraction.createHostCopy()(1,1) * dp.createHostCopy()(1,1) / physconst::gravit; - scream::rrtmgp::mixing_ratio_to_cloud_mass(mixing_ratio, cloud_fraction, dp, cloud_mass); - REQUIRE(cloud_mass.createHostCopy()(1,1) == cloud_mass_ref); - - // Clean up - dp.deallocate(); - mixing_ratio.deallocate(); - cloud_fraction.deallocate(); - cloud_mass.deallocate(); - yakl::finalize(); -} - -TEST_CASE("rrtmgp_test_limit_to_bounds") { - // Initialize YAKL - if (!yakl::isInitialized()) { yakl::init(); } - - // Test limiter function - auto arr = real2d("arr", 2, 2); - auto arr_limited = real2d("arr_limited", 2, 2); - - // Setup dummy array - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - arr(1,1) = 1.0; - arr(1,2) = 2.0; - arr(2,1) = 3.0; - arr(2,2) = 4.0; - }); - - // Limit to bounds that contain the data; should be no change in values - scream::rrtmgp::limit_to_bounds(arr, 0.0, 5.0, arr_limited); - REQUIRE(arr.createHostCopy()(1,1) == arr_limited.createHostCopy()(1,1)); - REQUIRE(arr.createHostCopy()(1,2) == arr_limited.createHostCopy()(1,2)); - REQUIRE(arr.createHostCopy()(2,1) == arr_limited.createHostCopy()(2,1)); - REQUIRE(arr.createHostCopy()(2,2) == arr_limited.createHostCopy()(2,2)); - - // Limit to bounds that do not completely contain the data; should be a change in values! - scream::rrtmgp::limit_to_bounds(arr, 1.5, 3.5, arr_limited); - REQUIRE(arr_limited.createHostCopy()(1,1) == 1.5); - REQUIRE(arr_limited.createHostCopy()(1,2) == 2.0); - REQUIRE(arr_limited.createHostCopy()(2,1) == 3.0); - REQUIRE(arr_limited.createHostCopy()(2,2) == 3.5); - arr.deallocate(); - arr_limited.deallocate(); - yakl::finalize(); -} - -TEST_CASE("rrtmgp_test_zenith") { - - // Create some dummy data - int orbital_year = 1990; - double calday = 1.0000000000000000; - double eccen_ref = 1.6707719799280658E-002; - double mvelpp_ref = 4.9344679089867318; - double lambm0_ref = -3.2503635878519378E-002; - double obliqr_ref = 0.40912382465788016; - double delta_ref = -0.40302893695478670; - double eccf_ref = 1.0342222039093694; - double lat = -7.7397590528644963E-002; - double lon = 2.2584340271163548; - double coszrs_ref = 0.61243613606766745; - - // Test shr_orb_params() - // Get orbital parameters based on calendar day - double eccen; - double obliq; // obliquity in degrees - double mvelp; // moving vernal equinox long of perihelion; degrees? - double obliqr; - double lambm0; - double mvelpp; - // bool flag_print = false; - shr_orb_params_c2f(&orbital_year, &eccen, &obliq, &mvelp, - &obliqr, &lambm0, &mvelpp); //, flag_print); // Note fortran code has optional arg - REQUIRE(eccen == eccen_ref); - REQUIRE(obliqr == obliqr_ref); - REQUIRE(mvelpp == mvelpp_ref); - REQUIRE(lambm0 == lambm0_ref); - REQUIRE(mvelpp == mvelpp_ref); - - // Test shr_orb_decl() - double delta; - double eccf; - shr_orb_decl_c2f(calday, eccen, mvelpp, lambm0, - obliqr, &delta, &eccf); - REQUIRE(delta == delta_ref); - REQUIRE(eccf == eccf_ref ); - - double dt_avg = 0.; //3600.0000000000000; - double coszrs = shr_orb_cosz_c2f(calday, lat, lon, delta, dt_avg); - REQUIRE(std::abs(coszrs-coszrs_ref)<1e-14); - - // Another case, this time WITH dt_avg flag: - calday = 1.0833333333333333; - eccen = 1.6707719799280658E-002; - mvelpp = 4.9344679089867318; - lambm0 = -3.2503635878519378E-002; - obliqr = 0.40912382465788016; - delta = -0.40292121709083456; - eccf = 1.0342248931660425; - lat = -1.0724153591027763; - lon = 4.5284876076962712; - dt_avg = 3600.0000000000000; - coszrs_ref = 0.14559973262047626; - coszrs = shr_orb_cosz_c2f(calday, lat, lon, delta, dt_avg); - REQUIRE(std::abs(coszrs-coszrs_ref)<1e-14); - -} - -TEST_CASE("rrtmgp_test_compute_broadband_surface_flux") { - using namespace ekat::logger; - using logger_t = Logger; - - ekat::Comm comm(MPI_COMM_WORLD); - auto logger = std::make_shared("",LogLevel::info,comm); - - // Initialize YAKL - if (!yakl::isInitialized()) { yakl::init(); } - - // Create arrays - const int ncol = 1; - const int nlay = 1; - const int nbnd = 14; - const int kbot = nlay + 1; - auto sfc_flux_dir_nir = real1d("sfc_flux_dir_nir", ncol); - auto sfc_flux_dir_vis = real1d("sfc_flux_dir_vis", ncol); - auto sfc_flux_dif_nir = real1d("sfc_flux_dif_nir", ncol); - auto sfc_flux_dif_vis = real1d("sfc_flux_dif_vis", ncol); - - // Need to initialize RRTMGP with dummy gases - logger->info("Init gases...\n"); - GasConcs gas_concs; - string1dv gas_names = {"h2o", "co2", "o3", "n2o", "co", "ch4", "o2", "n2"}; - gas_concs.init(gas_names,ncol,nlay); - logger->info("Init RRTMGP...\n"); - scream::rrtmgp::rrtmgp_initialize(gas_concs, coefficients_file_sw, coefficients_file_lw, cloud_optics_file_sw, cloud_optics_file_lw, logger); - - // Create simple test cases; We expect, given the input data, that band 10 - // will straddle the NIR and VIS, bands 1-9 will be purely NIR, and bands 11-14 - // will be purely VIS. The implementation in EAMF90 was hard-coded with this - // band information, but our implementation of compute_broadband_surface_fluxes - // actually checks the wavenumber limits. These tests will mostly check to make - // sure our implementation of that is doing what we think it is. - - // --------------------------------- - // Test case: flux only in straddled band - auto sw_bnd_flux_dir = real3d("sw_bnd_flux_dir", ncol, nlay+1, nbnd); - auto sw_bnd_flux_dif = real3d("sw_bnd_flux_dif", ncol, nlay+1, nbnd); - logger->info("Populate band-resolved 3d fluxes for test case with only transition band flux...\n"); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<3>(nbnd,nlay+1,ncol), YAKL_LAMBDA(int ibnd, int ilay, int icol) { - if (ibnd < 10) { - sw_bnd_flux_dir(icol,ilay,ibnd) = 0; - sw_bnd_flux_dif(icol,ilay,ibnd) = 0; - } else if (ibnd == 10) { - sw_bnd_flux_dir(icol,ilay,ibnd) = 1; - sw_bnd_flux_dif(icol,ilay,ibnd) = 1; - } else { - sw_bnd_flux_dir(icol,ilay,ibnd) = 0; - sw_bnd_flux_dif(icol,ilay,ibnd) = 0; - } - }); - // Compute surface fluxes - logger->info("Compute broadband surface fluxes...\n"); - scream::rrtmgp::compute_broadband_surface_fluxes( - ncol, kbot, nbnd, - sw_bnd_flux_dir, sw_bnd_flux_dif, - sfc_flux_dir_vis, sfc_flux_dir_nir, - sfc_flux_dif_vis, sfc_flux_dif_nir - ); - // Check computed surface fluxes - logger->info("Check computed fluxes...\n"); - const double tol = 1e-10; // tolerance on floating point inequality for assertions - REQUIRE(std::abs(sfc_flux_dir_nir.createHostCopy()(1) - 0.5) < tol); - REQUIRE(std::abs(sfc_flux_dir_vis.createHostCopy()(1) - 0.5) < tol); - REQUIRE(std::abs(sfc_flux_dif_nir.createHostCopy()(1) - 0.5) < tol); - REQUIRE(std::abs(sfc_flux_dif_vis.createHostCopy()(1) - 0.5) < tol); - // --------------------------------- - - // --------------------------------- - // Test case, only flux in NIR bands - logger->info("Populate band-resolved 3d fluxes for test case with only NIR flux...\n"); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<3>(nbnd,nlay+1,ncol), YAKL_LAMBDA(int ibnd, int ilay, int icol) { - if (ibnd < 10) { - sw_bnd_flux_dir(icol,ilay,ibnd) = 1; - sw_bnd_flux_dif(icol,ilay,ibnd) = 1; - } else if (ibnd == 10) { - sw_bnd_flux_dir(icol,ilay,ibnd) = 0; - sw_bnd_flux_dif(icol,ilay,ibnd) = 0; - } else { - sw_bnd_flux_dir(icol,ilay,ibnd) = 0; - sw_bnd_flux_dif(icol,ilay,ibnd) = 0; - } - }); - // Compute surface fluxes - logger->info("Compute broadband surface fluxes...\n"); - scream::rrtmgp::compute_broadband_surface_fluxes( - ncol, kbot, nbnd, - sw_bnd_flux_dir, sw_bnd_flux_dif, - sfc_flux_dir_vis, sfc_flux_dir_nir, - sfc_flux_dif_vis, sfc_flux_dif_nir - ); - // Check computed surface fluxes - logger->info("Check computed fluxes...\n"); - REQUIRE(std::abs(sfc_flux_dir_nir.createHostCopy()(1) - 9.0) < tol); - REQUIRE(std::abs(sfc_flux_dir_vis.createHostCopy()(1) - 0.0) < tol); - REQUIRE(std::abs(sfc_flux_dif_nir.createHostCopy()(1) - 9.0) < tol); - REQUIRE(std::abs(sfc_flux_dif_vis.createHostCopy()(1) - 0.0) < tol); - // --------------------------------- - - // --------------------------------- - // Test case, only flux in VIS bands - logger->info("Populate band-resolved 3d fluxes for test case with only VIS/UV flux...\n"); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<3>(nbnd,nlay+1,ncol), YAKL_LAMBDA(int ibnd, int ilay, int icol) { - if (ibnd < 10) { - sw_bnd_flux_dir(icol,ilay,ibnd) = 0; - sw_bnd_flux_dif(icol,ilay,ibnd) = 0; - } else if (ibnd == 10) { - sw_bnd_flux_dir(icol,ilay,ibnd) = 0; - sw_bnd_flux_dif(icol,ilay,ibnd) = 0; - } else { - sw_bnd_flux_dir(icol,ilay,ibnd) = 1; - sw_bnd_flux_dif(icol,ilay,ibnd) = 1; - } - }); - // Compute surface fluxes - logger->info("Compute broadband surface fluxes...\n"); - scream::rrtmgp::compute_broadband_surface_fluxes( - ncol, kbot, nbnd, - sw_bnd_flux_dir, sw_bnd_flux_dif, - sfc_flux_dir_vis, sfc_flux_dir_nir, - sfc_flux_dif_vis, sfc_flux_dif_nir - ); - // Check computed surface fluxes - logger->info("Check computed fluxes...\n"); - REQUIRE(std::abs(sfc_flux_dir_nir.createHostCopy()(1) - 0.0) < tol); - REQUIRE(std::abs(sfc_flux_dir_vis.createHostCopy()(1) - 4.0) < tol); - REQUIRE(std::abs(sfc_flux_dif_nir.createHostCopy()(1) - 0.0) < tol); - REQUIRE(std::abs(sfc_flux_dif_vis.createHostCopy()(1) - 4.0) < tol); - // --------------------------------- - - // --------------------------------- - // Test case, only flux in all bands - logger->info("Populate band-resolved 3d fluxes for test with non-zero flux in all bands...\n"); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<3>(nbnd,nlay+1,ncol), YAKL_LAMBDA(int ibnd, int ilay, int icol) { - if (ibnd < 10) { - sw_bnd_flux_dir(icol,ilay,ibnd) = 1.0; - sw_bnd_flux_dif(icol,ilay,ibnd) = 2.0; - } else if (ibnd == 10) { - sw_bnd_flux_dir(icol,ilay,ibnd) = 3.0; - sw_bnd_flux_dif(icol,ilay,ibnd) = 4.0; - } else { - sw_bnd_flux_dir(icol,ilay,ibnd) = 5.0; - sw_bnd_flux_dif(icol,ilay,ibnd) = 6.0; - } - }); - // Compute surface fluxes - logger->info("Compute broadband surface fluxes...\n"); - scream::rrtmgp::compute_broadband_surface_fluxes( - ncol, kbot, nbnd, - sw_bnd_flux_dir, sw_bnd_flux_dif, - sfc_flux_dir_vis, sfc_flux_dir_nir, - sfc_flux_dif_vis, sfc_flux_dif_nir - ); - // Check computed surface fluxes - logger->info("Check computed fluxes...\n"); - REQUIRE(std::abs(sfc_flux_dir_nir.createHostCopy()(1) - 10.5) < tol); - REQUIRE(std::abs(sfc_flux_dir_vis.createHostCopy()(1) - 21.5) < tol); - REQUIRE(std::abs(sfc_flux_dif_nir.createHostCopy()(1) - 20.0) < tol); - REQUIRE(std::abs(sfc_flux_dif_vis.createHostCopy()(1) - 26.0) < tol); - // --------------------------------- - - // Finalize YAKL - logger->info("Free memory...\n"); - scream::rrtmgp::rrtmgp_finalize(); - gas_concs.reset(); - sw_bnd_flux_dir.deallocate(); - sw_bnd_flux_dif.deallocate(); - sfc_flux_dir_nir.deallocate(); - sfc_flux_dir_vis.deallocate(); - sfc_flux_dif_nir.deallocate(); - sfc_flux_dif_vis.deallocate(); - if (yakl::isInitialized()) { yakl::finalize(); } -} - -TEST_CASE("rrtmgp_test_radiation_do") { - // If we specify rad every step, radiation_do should always be true - REQUIRE(scream::rrtmgp::radiation_do(1, 0) == true); - REQUIRE(scream::rrtmgp::radiation_do(1, 1) == true); - REQUIRE(scream::rrtmgp::radiation_do(1, 2) == true); - - // Test cases where we want rad called every other step - REQUIRE(scream::rrtmgp::radiation_do(2, 0) == true); - REQUIRE(scream::rrtmgp::radiation_do(2, 1) == false); - REQUIRE(scream::rrtmgp::radiation_do(2, 2) == true); - REQUIRE(scream::rrtmgp::radiation_do(2, 3) == false); - - // Test cases where we want rad every third step - REQUIRE(scream::rrtmgp::radiation_do(3, 0) == true); - REQUIRE(scream::rrtmgp::radiation_do(3, 1) == false); - REQUIRE(scream::rrtmgp::radiation_do(3, 2) == false); - REQUIRE(scream::rrtmgp::radiation_do(3, 3) == true); - REQUIRE(scream::rrtmgp::radiation_do(3, 4) == false); - REQUIRE(scream::rrtmgp::radiation_do(3, 5) == false); - REQUIRE(scream::rrtmgp::radiation_do(3, 6) == true); -} - -TEST_CASE("rrtmgp_test_check_range") { - // Initialize YAKL - if (!yakl::isInitialized()) { yakl::init(); } - // Create some dummy data and test with both values inside valid range and outside - auto dummy = real2d("dummy", 2, 1); - // All values within range - memset(dummy, 0.1); - REQUIRE(scream::rrtmgp::check_range(dummy, 0.0, 1.0, "dummy") == true); - // At least one value below lower bound - yakl::fortran::parallel_for(1, YAKL_LAMBDA (int i) {dummy(i, 1) = -0.1;}); - REQUIRE(scream::rrtmgp::check_range(dummy, 0.0, 1.0, "dummy") == false); - // At least one value above upper bound - yakl::fortran::parallel_for(1, YAKL_LAMBDA (int i) {dummy(i, 1) = 1.1;}); - REQUIRE(scream::rrtmgp::check_range(dummy, 0.0, 1.0, "dummy") == false); - dummy.deallocate(); - if (yakl::isInitialized()) { yakl::finalize(); } -} - -TEST_CASE("rrtmgp_test_subcol_gen") { - // Initialize YAKL - if (!yakl::isInitialized()) { yakl::init(); } - // Create dummy data - const int ncol = 1; - const int nlay = 4; - const int ngpt = 10; - auto cldfrac = real2d("cldfrac", ncol, nlay); - // Set cldfrac values - memset(cldfrac, 0.0); - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - cldfrac(1,1) = 1; - cldfrac(1,2) = 0.5; - cldfrac(1,3) = 0; - cldfrac(1,4) = 1; - }); - auto cldmask = int3d("cldmask", ncol, nlay, ngpt); - auto cldfrac_from_mask = real2d("cldfrac_from_mask", ncol, nlay); - // Run subcol gen, make sure we get what we expect; do this for some different seed values - for (unsigned seed = 1; seed <= 10; seed++) { - auto seeds = int1d("seeds", ncol); - memset(seeds, seed); - cldmask = scream::rrtmgp::get_subcolumn_mask(ncol, nlay, ngpt, cldfrac, 1, seeds); - // Check answers by computing new cldfrac from mask - memset(cldfrac_from_mask, 0.0); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<2>(nlay,ncol), YAKL_LAMBDA(int ilay, int icol) { - for (int igpt = 1; igpt <= ngpt; ++igpt) { - real cldmask_real = cldmask(icol,ilay,igpt); - cldfrac_from_mask(icol,ilay) += cldmask_real; - } - }); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<2>(nlay,ncol), YAKL_LAMBDA(int ilay, int icol) { - cldfrac_from_mask(icol,ilay) = cldfrac_from_mask(icol,ilay) / ngpt; - }); - // For cldfrac 1 we should get 1, for cldfrac 0 we should get 0, but in between we cannot be sure - // deterministically, since the computed cloud mask depends on pseudo-random numbers - REQUIRE(cldfrac_from_mask.createHostCopy()(1,1) == 1); - REQUIRE(cldfrac_from_mask.createHostCopy()(1,2) <= 1); - REQUIRE(cldfrac_from_mask.createHostCopy()(1,3) == 0); - REQUIRE(cldfrac_from_mask.createHostCopy()(1,4) == 1); - } - - // For maximum-random overlap, vertically-contiguous layers maximimally overlap, - // thus if we have non-zero cloud fraction in two adjacent layers, then every subcolumn - // that has cloud in the layer above must also have cloud in the layer below; test - // this property by creating two layers with non-zero cloud fraction, creating subcolums, - // and verifying that every subcolumn with cloud in layer 1 has cloud in layer 2 - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - cldfrac(1,1) = 0.5; - cldfrac(1,2) = 0.5; - cldfrac(1,3) = 0; - cldfrac(1,4) = 0; - }); - for (unsigned seed = 1; seed <= 10; seed++) { - auto seeds = int1d("seeds", ncol); - memset(seeds, seed); - cldmask = scream::rrtmgp::get_subcolumn_mask(ncol, nlay, ngpt, cldfrac, 1, seeds); - auto cldmask_h = cldmask.createHostCopy(); - for (int igpt = 1; igpt <= ngpt; igpt++) { - if (cldmask_h(1,1,igpt) == 1) { - REQUIRE(cldmask_h(1,2,igpt) == 1); - } - } - } - // Clean up after test - cldfrac.deallocate(); - cldmask.deallocate(); - cldfrac_from_mask.deallocate(); - yakl::finalize(); -} - - -TEST_CASE("rrtmgp_cloud_area") { - // Initialize YAKL - if (!yakl::isInitialized()) { yakl::init(); } - // Create dummy data - const int ncol = 1; - const int nlay = 2; - const int ngpt = 3; - auto cldtau = real3d("cldtau", ncol, nlay, ngpt); - auto cldtot = real1d("cldtot", ncol); - auto pmid = real2d("pmid", ncol, nlay); - - // Set up pressure levels for test problem - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - pmid(1,1) = 100; - pmid(1,2) = 200; - }); - - // Case: - // - // 0 0 0 - // 0 0 0 - // - // should give cldtot = 0.0 - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - cldtau(1,1,1) = 0; - cldtau(1,1,2) = 0; - cldtau(1,1,3) = 0; - cldtau(1,2,1) = 0; - cldtau(1,2,2) = 0; - cldtau(1,2,3) = 0; - }); - scream::rrtmgp::compute_cloud_area(ncol, nlay, ngpt, 0, std::numeric_limits::max(), pmid, cldtau, cldtot); - REQUIRE(cldtot.createHostCopy()(1) == 0.0); - - // Case: - // - // 1 1 1 - // 1 1 1 - // - // should give cldtot = 1.0 - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - cldtau(1,1,1) = 1; - cldtau(1,1,2) = 1; - cldtau(1,1,3) = 1; - cldtau(1,2,1) = 1; - cldtau(1,2,2) = 1; - cldtau(1,2,3) = 1; - }); - scream::rrtmgp::compute_cloud_area(ncol, nlay, ngpt, 0, std::numeric_limits::max(), pmid, cldtau, cldtot); - REQUIRE(cldtot.createHostCopy()(1) == 1.0); - - // Case: - // - // 1 1 0 100 - // 0 0 1 200 - // - // should give cldtot = 1.0 - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - cldtau(1,1,1) = 0.1; - cldtau(1,1,2) = 1.5; - cldtau(1,1,3) = 0; - cldtau(1,2,1) = 0; - cldtau(1,2,2) = 0; - cldtau(1,2,3) = 1.0; - }); - scream::rrtmgp::compute_cloud_area(ncol, nlay, ngpt, 0, std::numeric_limits::max(), pmid, cldtau, cldtot); - REQUIRE(cldtot.createHostCopy()(1) == 1.0); - scream::rrtmgp::compute_cloud_area(ncol, nlay, ngpt, 0, 150, pmid, cldtau, cldtot); - REQUIRE(cldtot.createHostCopy()(1) == 2.0 / 3.0); - scream::rrtmgp::compute_cloud_area(ncol, nlay, ngpt, 110, 250, pmid, cldtau, cldtot); - REQUIRE(cldtot.createHostCopy()(1) == 1.0 / 3.0); - - // Case: - // - // 1 0 0 - // 1 0 1 - // - // should give cldtot = 2/3 - yakl::fortran::parallel_for(1, YAKL_LAMBDA(int /* dummy */) { - cldtau(1,1,1) = 1; - cldtau(1,1,2) = 0; - cldtau(1,1,3) = 0; - cldtau(1,2,1) = 1; - cldtau(1,2,2) = 0; - cldtau(1,2,3) = 1; - }); - scream::rrtmgp::compute_cloud_area(ncol, nlay, ngpt, 0, std::numeric_limits::max(), pmid, cldtau, cldtot); - REQUIRE(cldtot.createHostCopy()(1) == 2.0 / 3.0); - scream::rrtmgp::compute_cloud_area(ncol, nlay, ngpt, 0, 100, pmid, cldtau, cldtot); - REQUIRE(cldtot.createHostCopy()(1) == 0.0); - scream::rrtmgp::compute_cloud_area(ncol, nlay, ngpt, 100, 300, pmid, cldtau, cldtot); - REQUIRE(cldtot.createHostCopy()(1) == 2.0 / 3.0); - pmid.deallocate(); - cldtau.deallocate(); - cldtot.deallocate(); - yakl::finalize(); -} - -TEST_CASE("rrtmgp_aerocom_cloudtop") { - // Initialize YAKL - if(!yakl::isInitialized()) { - yakl::init(); - } - // Create dummy data - const int ncol = 1; - const int nlay = 9; - // Set up input fields - auto tmid = real2d("tmid", ncol, nlay); - auto pmid = real2d("pmid", ncol, nlay); - auto p_del = real2d("p_del", ncol, nlay); - auto z_del = real2d("z_del", ncol, nlay); - auto qc = real2d("qc", ncol, nlay); - auto qi = real2d("qi", ncol, nlay); - auto rel = real2d("rel", ncol, nlay); - auto rei = real2d("rei", ncol, nlay); - auto cldfrac_tot = real2d("cldfrac_tot", ncol, nlay); - auto nc = real2d("nc", ncol, nlay); - // Set up output fields - auto tmid_at_cldtop = real1d("tmid_at_cldtop", ncol); - auto pmid_at_cldtop = real1d("pmid_at_cldtop", ncol); - auto cldfrac_ice_at_cldtop = real1d("cldfrac_ice_at_cldtop", ncol); - auto cldfrac_liq_at_cldtop = real1d("cldfrac_liq_at_cldtop", ncol); - auto cldfrac_tot_at_cldtop = real1d("cldfrac_tot_at_cldtop", ncol); - auto cdnc_at_cldtop = real1d("cdnc_at_cldtop", ncol); - auto eff_radius_qc_at_cldtop = real1d("eff_radius_qc_at_cldtop", ncol); - auto eff_radius_qi_at_cldtop = real1d("eff_radius_qi_at_cldtop", ncol); - - // Case 1: if no clouds, everything goes to zero - memset(tmid, 300.0); - memset(pmid, 100.0); - memset(p_del, 10.0); - memset(z_del, 100.0); - memset(qc, 1.0); - memset(qi, 1.0); - memset(cldfrac_tot, 0.0); - memset(nc, 5.0); - memset(rel, 10.0); - memset(rei, 10.0); - // Call the function - scream::rrtmgp::compute_aerocom_cloudtop( - ncol, nlay, tmid, pmid, p_del, z_del, qc, qi, rel, rei, cldfrac_tot, nc, - tmid_at_cldtop, pmid_at_cldtop, cldfrac_ice_at_cldtop, - cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop, - eff_radius_qc_at_cldtop, eff_radius_qi_at_cldtop); - - // Check the results - REQUIRE(tmid_at_cldtop.createHostCopy()(1) == 0.0); - REQUIRE(pmid_at_cldtop.createHostCopy()(1) == 0.0); - REQUIRE(cldfrac_tot_at_cldtop.createHostCopy()(1) == 0.0); - REQUIRE(cldfrac_liq_at_cldtop.createHostCopy()(1) == 0.0); - REQUIRE(cldfrac_ice_at_cldtop.createHostCopy()(1) == 0.0); - REQUIRE(cdnc_at_cldtop.createHostCopy()(1) == 0.0); - REQUIRE(eff_radius_qc_at_cldtop.createHostCopy()(1) == 0.0); - REQUIRE(eff_radius_qi_at_cldtop.createHostCopy()(1) == 0.0); - - // Case 2: if all clouds, everything goes to 1 * its value - memset(cldfrac_tot, 1.0); - scream::rrtmgp::compute_aerocom_cloudtop( - ncol, nlay, tmid, pmid, p_del, z_del, qc, qi, rel, rei, cldfrac_tot, nc, - tmid_at_cldtop, pmid_at_cldtop, cldfrac_ice_at_cldtop, - cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop, - eff_radius_qc_at_cldtop, eff_radius_qi_at_cldtop); - - REQUIRE(tmid_at_cldtop.createHostCopy()(1) == 300.0); - REQUIRE(pmid_at_cldtop.createHostCopy()(1) == 100.0); - REQUIRE(cldfrac_tot_at_cldtop.createHostCopy()(1) == 1.0); - REQUIRE(cldfrac_liq_at_cldtop.createHostCopy()(1) == 0.5); - REQUIRE(cldfrac_ice_at_cldtop.createHostCopy()(1) == 0.5); - REQUIRE(cdnc_at_cldtop.createHostCopy()(1) > 0.0); - REQUIRE(eff_radius_qc_at_cldtop.createHostCopy()(1) > 0.0); - REQUIRE(eff_radius_qi_at_cldtop.createHostCopy()(1) > 0.0); - - // Case 3: test max overlap (if contiguous cloudy layers, then max) - memset(cldfrac_tot, 0.0); - yakl::fortran::parallel_for( - 1, YAKL_LAMBDA(int /* dummy */) { - cldfrac_tot(1, 2) = 0.5; - cldfrac_tot(1, 3) = 0.7; - cldfrac_tot(1, 4) = 0.3; - cldfrac_tot(1, 5) = 0.2; - }); - scream::rrtmgp::compute_aerocom_cloudtop( - ncol, nlay, tmid, pmid, p_del, z_del, qc, qi, rel, rei, cldfrac_tot, nc, - tmid_at_cldtop, pmid_at_cldtop, cldfrac_ice_at_cldtop, - cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop, - eff_radius_qc_at_cldtop, eff_radius_qi_at_cldtop); - - REQUIRE(cldfrac_tot_at_cldtop.createHostCopy()(1) == .7); - - // Case 3xtra: test max overlap - // This case produces >0.7 due to slight enhancement in the presence of a - // local minimum (0.1 is the local minimum between 0.2 and 0.4) - yakl::fortran::parallel_for( - 1, YAKL_LAMBDA(int /* dummy */) { - cldfrac_tot(1, 5) = 0.1; - cldfrac_tot(1, 6) = 0.4; - cldfrac_tot(1, 7) = 0.2; - }); - scream::rrtmgp::compute_aerocom_cloudtop( - ncol, nlay, tmid, pmid, p_del, z_del, qc, qi, rel, rei, cldfrac_tot, nc, - tmid_at_cldtop, pmid_at_cldtop, cldfrac_ice_at_cldtop, - cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop, - eff_radius_qc_at_cldtop, eff_radius_qi_at_cldtop); - - REQUIRE(cldfrac_tot_at_cldtop.createHostCopy()(1) > .7); - - // Case 4: test random overlap (if non-contiguous cloudy layers, then - // random) - yakl::fortran::parallel_for( - 1, YAKL_LAMBDA(int /* dummy */) { - cldfrac_tot(1, 5) = 0.0; - cldfrac_tot(1, 6) = 0.1; - }); - scream::rrtmgp::compute_aerocom_cloudtop( - ncol, nlay, tmid, pmid, p_del, z_del, qc, qi, rel, rei, cldfrac_tot, nc, - tmid_at_cldtop, pmid_at_cldtop, cldfrac_ice_at_cldtop, - cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop, - eff_radius_qc_at_cldtop, eff_radius_qi_at_cldtop); - - REQUIRE(cldfrac_tot_at_cldtop.createHostCopy()(1) > - .7); // larger than the max - - // Case 5a: test independence of ice and liquid fractions - yakl::fortran::parallel_for( - 1, YAKL_LAMBDA(int /* dummy */) { - cldfrac_tot(1, 2) = 1.0; - cldfrac_tot(1, 7) = 1.0; - cldfrac_tot(1, 8) = 0.2; - }); - memset(qc, 1.0); - memset(qi, 0.0); - scream::rrtmgp::compute_aerocom_cloudtop( - ncol, nlay, tmid, pmid, p_del, z_del, qc, qi, rel, rei, cldfrac_tot, nc, - tmid_at_cldtop, pmid_at_cldtop, cldfrac_ice_at_cldtop, - cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop, - eff_radius_qc_at_cldtop, eff_radius_qi_at_cldtop); - - REQUIRE(cldfrac_tot_at_cldtop.createHostCopy()(1) == 1.0); - REQUIRE(cldfrac_liq_at_cldtop.createHostCopy()(1) == 1.0); - REQUIRE(cldfrac_ice_at_cldtop.createHostCopy()(1) == 0.0); - - // Case 5b: test independence of ice and liquid fractions - yakl::fortran::parallel_for( - 1, YAKL_LAMBDA(int /* dummy */) { - cldfrac_tot(1, 2) = 1.0; - cldfrac_tot(1, 7) = 1.0; - cldfrac_tot(1, 8) = 0.2; - }); - memset(qc, 0.0); - memset(qi, 1.0); - scream::rrtmgp::compute_aerocom_cloudtop( - ncol, nlay, tmid, pmid, p_del, z_del, qc, qi, rel, rei, cldfrac_tot, nc, - tmid_at_cldtop, pmid_at_cldtop, cldfrac_ice_at_cldtop, - cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop, - eff_radius_qc_at_cldtop, eff_radius_qi_at_cldtop); - - REQUIRE(cldfrac_tot_at_cldtop.createHostCopy()(1) == 1.0); - REQUIRE(cldfrac_liq_at_cldtop.createHostCopy()(1) == 0.0); - REQUIRE(cldfrac_ice_at_cldtop.createHostCopy()(1) == 1.0); - - // Case 6: test independence of ice and liquid fractions - // There is NOT complete independence... - // Essentially, higher ice clouds mask lower liquid clouds - // This can be problematic if the ice clouds are thin... - // We will revisit and validate this assumption later - memset(cldfrac_tot, 0.0); - memset(qc, 0.0); - memset(qi, 0.0); - yakl::fortran::parallel_for( - 1, YAKL_LAMBDA(int /* dummy */) { - cldfrac_tot(1, 2) = 0.5; // ice - cldfrac_tot(1, 3) = 0.7; // ice ------> max - cldfrac_tot(1, 4) = 0.3; // ice - // note cldfrac_tot(1, 5) is 0 - cldfrac_tot(1, 6) = 0.2; // liq - cldfrac_tot(1, 7) = 0.5; // liq ------> not max - cldfrac_tot(1, 8) = 0.1; // liq - // note cldfrac_tot(1, 9) is 0 - qi(1, 2) = 100; - qi(1, 3) = 200; - qi(1, 4) = 50; - // note qc(1, 5) is 0 - // note qi(1, 5) is 0 - qc(1, 6) = 20; - qc(1, 7) = 50; - qc(1, 8) = 10; - }); - scream::rrtmgp::compute_aerocom_cloudtop( - ncol, nlay, tmid, pmid, p_del, z_del, qc, qi, rel, rei, cldfrac_tot, nc, - tmid_at_cldtop, pmid_at_cldtop, cldfrac_ice_at_cldtop, - cldfrac_liq_at_cldtop, cldfrac_tot_at_cldtop, cdnc_at_cldtop, - eff_radius_qc_at_cldtop, eff_radius_qi_at_cldtop); - REQUIRE(cldfrac_tot_at_cldtop.createHostCopy()(1) > 0.70); // unaffected - REQUIRE(cldfrac_liq_at_cldtop.createHostCopy()(1) < 0.50); // not max - REQUIRE(cldfrac_ice_at_cldtop.createHostCopy()(1) == 0.7); // max - - // cleanup - tmid.deallocate(); - pmid.deallocate(); - p_del.deallocate(); - z_del.deallocate(); - qc.deallocate(); - qi.deallocate(); - rel.deallocate(); - rei.deallocate(); - cldfrac_tot.deallocate(); - nc.deallocate(); - - tmid_at_cldtop.deallocate(); - pmid_at_cldtop.deallocate(); - cldfrac_ice_at_cldtop.deallocate(); - cldfrac_liq_at_cldtop.deallocate(); - cldfrac_tot_at_cldtop.deallocate(); - cdnc_at_cldtop.deallocate(); - eff_radius_qc_at_cldtop.deallocate(); - eff_radius_qi_at_cldtop.deallocate(); - - yakl::finalize(); -} -#endif - -#ifdef RRTMGP_ENABLE_KOKKOS using interface_t = scream::rrtmgp::rrtmgp_interface<>; using pool_t = interface_t::pool_t; using real1dk = interface_t::view_t; @@ -915,7 +85,7 @@ TEST_CASE("rrtmgp_test_heating_k") { } TEST_CASE("rrtmgp_test_mixing_ratio_to_cloud_mass_k") { - // Initialize YAKL + // Initialize Kokkos scream::init_kls(); pool_t::init(10000); @@ -975,7 +145,7 @@ TEST_CASE("rrtmgp_test_mixing_ratio_to_cloud_mass_k") { } TEST_CASE("rrtmgp_test_limit_to_bounds_k") { - // Initialize YAKL + // Initialize Kokkos scream::init_kls(); pool_t::init(10000); @@ -1076,7 +246,7 @@ TEST_CASE("rrtmgp_test_compute_broadband_surface_flux_k") { ekat::Comm comm(MPI_COMM_WORLD); auto logger = std::make_shared("",LogLevel::info,comm); - // Initialize YAKL + // Initialize Kokkos scream::init_kls(); pool_t::init(10000); @@ -1232,7 +402,7 @@ TEST_CASE("rrtmgp_test_compute_broadband_surface_flux_k") { REQUIRE(std::abs(chc(sfc_flux_dif_vis)(0) - 26.0) < tol); // --------------------------------- - // Finalize YAKL + // Finalize Kokkos logger->info("Free memory...\n"); interface_t::rrtmgp_finalize(); gas_concs.reset(); @@ -1263,7 +433,7 @@ TEST_CASE("rrtmgp_test_radiation_do_k") { } TEST_CASE("rrtmgp_test_check_range_k") { - // Initialize YAKL + // Initialize Kokkos scream::init_kls(); pool_t::init(10000); // Create some dummy data and test with both values inside valid range and outside @@ -1282,7 +452,7 @@ TEST_CASE("rrtmgp_test_check_range_k") { } TEST_CASE("rrtmgp_test_subcol_gen_k") { - // Initialize YAKL + // Initialize Kokkos scream::init_kls(); pool_t::init(10000); // Create dummy data @@ -1352,7 +522,7 @@ TEST_CASE("rrtmgp_test_subcol_gen_k") { } TEST_CASE("rrtmgp_cloud_area_k") { - // Initialize YAKL + // Initialize Kokkos scream::init_kls(); pool_t::init(10000); // Create dummy data @@ -1449,7 +619,7 @@ TEST_CASE("rrtmgp_cloud_area_k") { } TEST_CASE("rrtmgp_aerocom_cloudtop_k") { - // Initialize YAKL + // Initialize Kokkos scream::init_kls(); pool_t::init(10000); @@ -1643,6 +813,5 @@ TEST_CASE("rrtmgp_aerocom_cloudtop_k") { pool_t::finalize(); scream::finalize_kls(); } -#endif } diff --git a/components/eamxx/tests/multi-process/dynamics_physics/homme_shoc_cld_spa_p3_rrtmgp_pg2_dp/homme_shoc_cld_spa_p3_rrtmgp_pg2_dp.cpp b/components/eamxx/tests/multi-process/dynamics_physics/homme_shoc_cld_spa_p3_rrtmgp_pg2_dp/homme_shoc_cld_spa_p3_rrtmgp_pg2_dp.cpp index c1efa80da1fc..541d5dd207c0 100644 --- a/components/eamxx/tests/multi-process/dynamics_physics/homme_shoc_cld_spa_p3_rrtmgp_pg2_dp/homme_shoc_cld_spa_p3_rrtmgp_pg2_dp.cpp +++ b/components/eamxx/tests/multi-process/dynamics_physics/homme_shoc_cld_spa_p3_rrtmgp_pg2_dp/homme_shoc_cld_spa_p3_rrtmgp_pg2_dp.cpp @@ -47,7 +47,7 @@ TEST_CASE("scream_homme_physics", "scream_homme_physics") { AtmosphereDriver ad; // Init, run, and finalize - // NOTE: Kokkos is finalize in ekat_catch_main.cpp, and YAKL is finalized + // NOTE: Kokkos is finalize in ekat_catch_main.cpp, and Kokkos is finalized // during RRTMGPRatiation::finalize_impl, after RRTMGP has deallocated // all its arrays. ad.set_comm(atm_comm); diff --git a/components/eamxx/tests/single-process/rrtmgp/CMakeLists.txt b/components/eamxx/tests/single-process/rrtmgp/CMakeLists.txt index c8b57e98948b..b7953b0e6103 100644 --- a/components/eamxx/tests/single-process/rrtmgp/CMakeLists.txt +++ b/components/eamxx/tests/single-process/rrtmgp/CMakeLists.txt @@ -6,25 +6,20 @@ set (FIXTURES_BASE_NAME ${TEST_BASE_NAME}_generate_output_nc_files) # Ensure test input files are present in the data dir GetInputFile(scream/init/${EAMxx_tests_IC_FILE_72lev}) -set(YAKL_LIB_NAME "") -if (SCREAM_RRTMGP_ENABLE_YAKL) - set(YAKL_LIB_NAME "yakl") -endif() - if (SCREAM_ENABLE_BASELINE_TESTS AND NOT SCREAM_ONLY_GENERATE_BASELINES) # Unit test to compare against raw rrtmgp output configure_file(${CMAKE_CURRENT_SOURCE_DIR}/input_unit.yaml ${CMAKE_CURRENT_BINARY_DIR}/input_unit.yaml) CreateUnitTest(${TEST_BASE_NAME}_unit rrtmgp_standalone_unit.cpp LABELS rrtmgp physics driver - LIBS scream_rrtmgp rrtmgp scream_control ${YAKL_LIB_NAME} diagnostics rrtmgp_test_utils + LIBS scream_rrtmgp rrtmgp scream_control diagnostics rrtmgp_test_utils EXE_ARGS "--args --rrtmgp_inputfile ${SCREAM_DATA_DIR}/init/rrtmgp-allsky.nc --rrtmgp_baseline ${SCREAM_BASELINES_DIR}/data/rrtmgp-allsky-baseline.nc" ) endif() ## Create rrtmgp stand alone executable CreateUnitTestExec(${TEST_BASE_NAME} "rrtmgp_standalone.cpp" - LIBS scream_rrtmgp rrtmgp scream_control ${YAKL_LIB_NAME} diagnostics + LIBS scream_rrtmgp rrtmgp scream_control diagnostics ) # The RRTMGP stand-alone test that runs multi-step diff --git a/components/eamxx/tests/single-process/rrtmgp/rrtmgp_standalone_unit.cpp b/components/eamxx/tests/single-process/rrtmgp/rrtmgp_standalone_unit.cpp index be67bc9bc5b1..bc7c05b517c7 100644 --- a/components/eamxx/tests/single-process/rrtmgp/rrtmgp_standalone_unit.cpp +++ b/components/eamxx/tests/single-process/rrtmgp/rrtmgp_standalone_unit.cpp @@ -19,12 +19,9 @@ #include #include -// RRTMGP and YAKL +// RRTMGP #include #include -#ifdef RRTMGP_ENABLE_YAKL -#include -#endif // System headers #include @@ -46,381 +43,6 @@ using PC = scream::physics::Constants; /* * Run standalone test through SCREAM driver this time */ -#ifdef RRTMGP_ENABLE_YAKL -TEST_CASE("rrtmgp_scream_standalone", "") { - // Get baseline name (needs to be passed as an arg) - std::string inputfile = ekat::TestSession::get().params.at("inputfile"); - std::string baseline = ekat::TestSession::get().params.at("baseline"); - - // Check if files exists - REQUIRE(rrtmgpTest::file_exists(inputfile.c_str())); - REQUIRE(rrtmgpTest::file_exists(baseline.c_str())); - - // Initialize yakl - if(!yakl::isInitialized()) { yakl::init(); } - - // Read reference fluxes from baseline file - real2d sw_flux_up_ref; - real2d sw_flux_dn_ref; - real2d sw_flux_dn_dir_ref; - real2d lw_flux_up_ref; - real2d lw_flux_dn_ref; - rrtmgpTest::read_fluxes(baseline, sw_flux_up_ref, sw_flux_dn_ref, sw_flux_dn_dir_ref, lw_flux_up_ref, lw_flux_dn_ref ); - - // Load ad parameter list - std::string fname = "input_unit.yaml"; - ekat::ParameterList ad_params("Atmosphere Driver"); - parse_yaml_file(fname,ad_params); - // Create a MPI communicator - ekat::Comm atm_comm (MPI_COMM_WORLD); - - // Need to register products in the factory *before* we create any atm process or grids manager. - register_physics (); - register_mesh_free_grids_manager(); - - // Create the driver - AtmosphereDriver ad; - - // Dummy timestamp - util::TimeStamp time ({2000,1,1},{0,0,0}); - - // Initialize the driver - ad.initialize(atm_comm, ad_params, time); - - /* - * Setup the dummy problem and overwrite default initial conditions - */ - - // Get dimension sizes from the field manager - const auto& grid = ad.get_grids_manager()->get_grid("point_grid"); - const auto& field_mgr = *ad.get_field_mgr(); - int ncol = grid->get_num_local_dofs(); - int nlay = grid->get_num_vertical_levels(); - - // In this test, we need to hack lat/lon. But the fields we get - // from the grid are read-only. Therefore, hack a bit, and cast - // away constness. It's bad, but it's only for this unit test - auto clat = grid->get_geometry_data("lat").get_view(); - auto clon = grid->get_geometry_data("lon").get_view(); - auto lat = const_cast(clat.data()); - auto lon = const_cast(clon.data()); - - // Get number of shortwave bands and number of gases from RRTMGP - int ngas = 8; // TODO: get this intelligently - - // Make sure we have the right dimension sizes - REQUIRE(nlay == static_cast(sw_flux_up_ref.dimension[1])-1); - - // Create yakl arrays to store the input data - auto p_lay = real2d("p_lay", ncol, nlay); - auto t_lay = real2d("t_lay", ncol, nlay); - auto p_del = real2d("p_del", ncol, nlay); - auto p_lev = real2d("p_lev", ncol, nlay+1); - auto t_lev = real2d("t_lev", ncol, nlay+1); - auto sfc_alb_dir_vis = real1d("sfc_alb_dir_vis", ncol); - auto sfc_alb_dir_nir = real1d("sfc_alb_dir_nir", ncol); - auto sfc_alb_dif_vis = real1d("sfc_alb_dif_vis", ncol); - auto sfc_alb_dif_nir = real1d("sfc_alb_dif_nir", ncol); - auto lwp = real2d("lwp", ncol, nlay); - auto iwp = real2d("iwp", ncol, nlay); - auto rel = real2d("rel", ncol, nlay); - auto rei = real2d("rei", ncol, nlay); - auto cld = real2d("cld", ncol, nlay); - auto mu0 = real1d("mu0", ncol); - auto gas_vmr = real3d("gas_vmr", ncol, nlay, ngas); - - // Setup dummy problem; need to use tmp arrays with ncol_all size - auto ncol_all = grid->get_num_global_dofs(); - auto p_lay_all = real2d("p_lay", ncol_all, nlay); - auto t_lay_all = real2d("t_lay", ncol_all, nlay); - auto p_lev_all = real2d("p_lev", ncol_all, nlay+1); - auto t_lev_all = real2d("t_lev", ncol_all, nlay+1); - auto sfc_alb_dir_vis_all = real1d("sfc_alb_dir_vis", ncol_all); - auto sfc_alb_dir_nir_all = real1d("sfc_alb_dir_nir", ncol_all); - auto sfc_alb_dif_vis_all = real1d("sfc_alb_dif_vis", ncol_all); - auto sfc_alb_dif_nir_all = real1d("sfc_alb_dif_nir", ncol_all); - auto lwp_all = real2d("lwp", ncol_all, nlay); - auto iwp_all = real2d("iwp", ncol_all, nlay); - auto rel_all = real2d("rel", ncol_all, nlay); - auto rei_all = real2d("rei", ncol_all, nlay); - auto cld_all = real2d("cld", ncol_all, nlay); - auto mu0_all = real1d("mu0", ncol_all); - // Read in dummy Garand atmosphere; if this were an actual model simulation, - // these would be passed as inputs to the driver - // NOTE: set ncol to size of col_flx dimension in the input file. This is so - // that we can compare to the reference data provided in that file. Note that - // this will copy the first column of the input data (the first profile) ncol - // times. We will then fill some fraction of these columns with clouds for - // the test problem. - GasConcs gas_concs; - real2d col_dry; - read_atmos(inputfile, p_lay_all, t_lay_all, p_lev_all, t_lev_all, gas_concs, col_dry, ncol_all); - // Setup dummy problem; need to use tmp arrays with ncol_all size - rrtmgpTest::dummy_atmos( - inputfile, ncol_all, p_lay_all, t_lay_all, - sfc_alb_dir_vis_all, sfc_alb_dir_nir_all, - sfc_alb_dif_vis_all, sfc_alb_dif_nir_all, - mu0_all, - lwp_all, iwp_all, rel_all, rei_all, cld_all - ); - // Populate our local input arrays with the proper columns, based on our rank - int irank = atm_comm.rank(); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<1>(ncol), YAKL_LAMBDA(int icol) { - auto icol_all = ncol * irank + icol; - sfc_alb_dir_vis(icol) = sfc_alb_dir_vis_all(icol_all); - sfc_alb_dir_nir(icol) = sfc_alb_dir_nir_all(icol_all); - sfc_alb_dif_vis(icol) = sfc_alb_dif_vis_all(icol_all); - sfc_alb_dif_nir(icol) = sfc_alb_dif_nir_all(icol_all); - mu0(icol) = mu0_all(icol_all); - }); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<2>(nlay,ncol), YAKL_LAMBDA(int ilay, int icol) { - auto icol_all = ncol * irank + icol; - p_lay(icol,ilay) = p_lay_all(icol_all,ilay); - t_lay(icol,ilay) = t_lay_all(icol_all,ilay); - lwp(icol,ilay) = lwp_all(icol_all,ilay); - iwp(icol,ilay) = iwp_all(icol_all,ilay); - rel(icol,ilay) = rel_all(icol_all,ilay); - rei(icol,ilay) = rei_all(icol_all,ilay); - cld(icol,ilay) = cld_all(icol_all,ilay); - }); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<2>(nlay+1,ncol), YAKL_LAMBDA(int ilay, int icol) { - auto icol_all = ncol * irank + icol; - p_lev(icol,ilay) = p_lev_all(icol_all,ilay); - t_lev(icol,ilay) = t_lev_all(icol_all,ilay); - }); - // Free temporary variables - p_lay_all.deallocate(); - t_lay_all.deallocate(); - p_lev_all.deallocate(); - t_lev_all.deallocate(); - sfc_alb_dir_vis_all.deallocate(); - sfc_alb_dir_nir_all.deallocate(); - sfc_alb_dif_vis_all.deallocate(); - sfc_alb_dif_nir_all.deallocate(); - lwp_all.deallocate(); - iwp_all.deallocate(); - rel_all.deallocate(); - rei_all.deallocate(); - cld_all.deallocate(); - mu0_all.deallocate(); - - // Need to calculate a dummy pseudo_density for our test problem - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<2>(nlay,ncol), YAKL_LAMBDA(int ilay, int icol) { - p_del(icol,ilay) = abs(p_lev(icol,ilay+1) - p_lev(icol,ilay)); - }); - - // We do not want to pass lwp and iwp through the FM, so back out qc and qi for this test - // NOTE: test problem provides lwp/iwp in g/m2, not kg/m2! Factor of 1e3 here! - auto qc = real2d("qc", ncol, nlay); - auto nc = real2d("nc", ncol, nlay); - auto qi = real2d("qi", ncol, nlay); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<2>(nlay, ncol), YAKL_LAMBDA(int ilay, int icol) { - qc(icol,ilay) = 1e-3 * lwp(icol,ilay) * cld(icol,ilay) * PC::gravit / p_del(icol,ilay); - qi(icol,ilay) = 1e-3 * iwp(icol,ilay) * cld(icol,ilay) * PC::gravit / p_del(icol,ilay); - }); - - // Copy gases from gas_concs to gas_vmr array - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<3>(ncol,nlay,ngas), YAKL_LAMBDA(int icol, int ilay, int igas) { - auto icol_all = ncol * irank + icol; - gas_vmr(icol,ilay,igas) = gas_concs.concs(icol_all,ilay,igas); - }); - gas_concs.reset(); - - // Before running, make a copy of T_mid so we can see changes - auto T_mid0 = real2d("T_mid0", ncol, nlay); - t_lay.deep_copy_to(T_mid0); - - // Grab views from field manager and copy in values from yakl arrays. Making - // copies is necessary since the yakl::Array take in data arranged with ncol - // as the fastest index, but the field manager expects the 2nd dimension as - // the fastest index. - auto d_pmid = field_mgr.get_field("p_mid").get_view(); - auto d_tmid = field_mgr.get_field("T_mid").get_view(); - auto d_pint = field_mgr.get_field("p_int").get_view(); - auto d_pdel = field_mgr.get_field("pseudo_density").get_view(); - auto d_sfc_alb_dir_vis = field_mgr.get_field("sfc_alb_dir_vis").get_view(); - auto d_sfc_alb_dir_nir = field_mgr.get_field("sfc_alb_dir_nir").get_view(); - auto d_sfc_alb_dif_vis = field_mgr.get_field("sfc_alb_dif_vis").get_view(); - auto d_sfc_alb_dif_nir = field_mgr.get_field("sfc_alb_dif_nir").get_view(); - auto d_surf_lw_flux_up = field_mgr.get_field("surf_lw_flux_up").get_view(); - auto d_qc = field_mgr.get_field("qc").get_view(); - auto d_nc = field_mgr.get_field("nc").get_view(); - auto d_qi = field_mgr.get_field("qi").get_view(); - auto d_rel = field_mgr.get_field("eff_radius_qc").get_view(); - auto d_rei = field_mgr.get_field("eff_radius_qi").get_view(); - auto d_cld = field_mgr.get_field("cldfrac_tot").get_view(); - - auto d_qv = field_mgr.get_field("qv").get_view(); - auto d_co2 = field_mgr.get_field("co2_volume_mix_ratio").get_view(); - auto d_o3 = field_mgr.get_field("o3_volume_mix_ratio").get_view(); - auto d_n2o = field_mgr.get_field("n2o_volume_mix_ratio").get_view(); - auto d_co = field_mgr.get_field("co_volume_mix_ratio").get_view(); - auto d_ch4 = field_mgr.get_field("ch4_volume_mix_ratio").get_view(); - auto d_o2 = field_mgr.get_field("o2_volume_mix_ratio").get_view(); - auto d_n2 = field_mgr.get_field("n2_volume_mix_ratio").get_view(); - - // Gather molecular weights of all the active gases in the test for conversion - // to mass-mixing-ratio. - { - const auto policy = ekat::ExeSpaceUtils::get_default_team_policy(ncol, nlay); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const MemberType& team) { - const int i = team.league_rank(); - - // Set lat and lon to single value for just this test: - // Note, these values will ensure that the cosine zenith - // angle will end up matching the constant value meant for - // the test, which is 0.86 - lat[i] = 5.224000000000; - lon[i] = 167.282000000000; - - d_sfc_alb_dir_vis(i) = sfc_alb_dir_vis(i+1); - d_sfc_alb_dir_nir(i) = sfc_alb_dir_nir(i+1); - d_sfc_alb_dif_vis(i) = sfc_alb_dif_vis(i+1); - d_sfc_alb_dif_nir(i) = sfc_alb_dif_nir(i+1); - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& k) { - d_pmid(i,k) = p_lay(i+1,k+1); - d_tmid(i,k) = t_lay(i+1,k+1); - d_pdel(i,k) = p_del(i+1,k+1); - d_qc(i,k) = qc(i+1,k+1); - d_nc(i,k) = nc(i+1,k+1); - d_qi(i,k) = qi(i+1,k+1); - d_rel(i,k) = rel(i+1,k+1); - d_rei(i,k) = rei(i+1,k+1); - d_cld(i,k) = cld(i+1,k+1); - d_pint(i,k) = p_lev(i+1,k+1); - // qv specified as a wet mixing ratio - Real qv_dry = gas_vmr(i+1,k+1,1)*PC::ep_2; - Real qv_wet = qv_dry/(1.0+qv_dry); - d_qv(i,k) = qv_wet; - // rest of active gases are specified as volume mixing ratios - d_co2(i,k) = gas_vmr(i+1,k+1,2); - d_o3(i,k) = gas_vmr(i+1,k+1,3); - d_n2o(i,k) = gas_vmr(i+1,k+1,4); - d_co(i,k) = gas_vmr(i+1,k+1,5); - d_ch4(i,k) = gas_vmr(i+1,k+1,6); - d_o2(i,k) = gas_vmr(i+1,k+1,7); - d_n2(i,k) = gas_vmr(i+1,k+1,8); - }); - - d_pint(i,nlay) = p_lev(i+1,nlay+1); - - // Compute surface flux from surface temperature - auto ibot = (p_lay(1,1) > p_lay(1,nlay)) ? 1 : nlay+1; - d_surf_lw_flux_up(i) = PC::stebol * pow(t_lev(i+1,ibot), 4.0); - }); - } - Kokkos::fence(); - - // Run driver - ad.run(300); - - // Check values; The correct values have been stored in the field manager, we need to - // copy back to YAKL::Array. - auto d_sw_flux_up = field_mgr.get_field("sw_flux_up").get_view(); - auto d_sw_flux_dn = field_mgr.get_field("sw_flux_dn").get_view(); - auto d_sw_flux_dn_dir = field_mgr.get_field("sw_flux_dn_dir").get_view(); - auto d_lw_flux_up = field_mgr.get_field("lw_flux_up").get_view(); - auto d_lw_flux_dn = field_mgr.get_field("lw_flux_dn").get_view(); - auto sw_flux_up_test = real2d("sw_flux_up_test", ncol, nlay+1); - auto sw_flux_dn_test = real2d("sw_flux_dn_test", ncol, nlay+1); - auto sw_flux_dn_dir_test = real2d("sw_flux_dn_dir_test", ncol, nlay+1); - auto lw_flux_up_test = real2d("lw_flux_up_test", ncol, nlay+1); - auto lw_flux_dn_test = real2d("lw_flux_dn_test", ncol, nlay+1); - { - const auto policy = ekat::ExeSpaceUtils::get_default_team_policy(ncol, nlay); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(const MemberType& team) { - const int i = team.league_rank(); - - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay+1), [&] (const int& k) { - if (k < nlay) t_lay(i+1,k+1) = d_tmid(i,k); - - sw_flux_up_test(i+1,k+1) = d_sw_flux_up(i,k); - sw_flux_dn_test(i+1,k+1) = d_sw_flux_dn(i,k); - sw_flux_dn_dir_test(i+1,k+1) = d_sw_flux_dn_dir(i,k); - lw_flux_up_test(i+1,k+1) = d_lw_flux_up(i,k); - lw_flux_dn_test(i+1,k+1) = d_lw_flux_dn(i,k); - }); - }); - } - Kokkos::fence(); - - // Dumb check to verify that we did indeed update temperature - REQUIRE(t_lay.createHostCopy()(1,1) != T_mid0.createHostCopy()(1,1)); - T_mid0.deallocate(); - - // Make sure fluxes from field manager that were calculated in AD call of RRTMGP match reference fluxes from input file - // We use all_close here instead of all_equals because we are only able - // to approximate the solar zenith angle used in the RRTMGP clear-sky - // test problem with our trial and error lat/lon values, so fluxes will - // be slightly off. We just verify that they are all "close" here, within - // some tolerance. Computation of level temperatures from midpoints is - // also unable to exactly reproduce the values in the test problem, so - // computed fluxes will be further off from the reference calculation. - // - // We need to create local copies with only the columns specific to our rank in case we have split columns over multiple ranks - auto sw_flux_up_loc = real2d("sw_flux_up_loc" , ncol, nlay+1); - auto sw_flux_dn_loc = real2d("sw_flux_dn_loc" , ncol, nlay+1); - auto sw_flux_dn_dir_loc = real2d("sw_flux_dn_dir_loc", ncol, nlay+1); - auto lw_flux_up_loc = real2d("lw_flux_up_loc" , ncol, nlay+1); - auto lw_flux_dn_loc = real2d("lw_flux_dn_loc" , ncol, nlay+1); - yakl::fortran::parallel_for(yakl::fortran::SimpleBounds<2>(nlay+1,ncol), YAKL_LAMBDA(int ilay, int icol) { - auto icol_all = ncol * irank + icol; - sw_flux_up_loc(icol,ilay) = sw_flux_up_ref(icol_all,ilay); - sw_flux_dn_loc(icol,ilay) = sw_flux_dn_ref(icol_all,ilay); - sw_flux_dn_dir_loc(icol,ilay) = sw_flux_dn_dir_ref(icol_all,ilay); - lw_flux_up_loc(icol,ilay) = lw_flux_up_ref(icol_all,ilay); - lw_flux_dn_loc(icol,ilay) = lw_flux_dn_ref(icol_all,ilay); - }); - REQUIRE(rrtmgpTest::all_close(sw_flux_up_loc , sw_flux_up_test , 1.0)); - REQUIRE(rrtmgpTest::all_close(sw_flux_dn_loc , sw_flux_dn_test , 1.0)); - REQUIRE(rrtmgpTest::all_close(sw_flux_dn_dir_loc, sw_flux_dn_dir_test, 1.0)); - REQUIRE(rrtmgpTest::all_close(lw_flux_up_loc , lw_flux_up_test , 1.0)); - REQUIRE(rrtmgpTest::all_close(lw_flux_dn_loc , lw_flux_dn_test , 1.0)); - - // Deallocate YAKL arrays - p_lay.deallocate(); - t_lay.deallocate(); - p_del.deallocate(); - p_lev.deallocate(); - t_lev.deallocate(); - sfc_alb_dir_vis.deallocate(); - sfc_alb_dir_nir.deallocate(); - sfc_alb_dif_vis.deallocate(); - sfc_alb_dif_nir.deallocate(); - lwp.deallocate(); - iwp.deallocate(); - rel.deallocate(); - rei.deallocate(); - cld.deallocate(); - qc.deallocate(); - nc.deallocate(); - qi.deallocate(); - mu0.deallocate(); - gas_vmr.deallocate(); - sw_flux_up_test.deallocate(); - sw_flux_dn_test.deallocate(); - sw_flux_dn_dir_test.deallocate(); - lw_flux_up_test.deallocate(); - lw_flux_dn_test.deallocate(); - sw_flux_up_ref.deallocate(); - sw_flux_dn_ref.deallocate(); - sw_flux_dn_dir_ref.deallocate(); - lw_flux_up_ref.deallocate(); - lw_flux_dn_ref.deallocate(); - sw_flux_up_loc.deallocate(); - sw_flux_dn_loc.deallocate(); - sw_flux_dn_dir_loc.deallocate(); - lw_flux_up_loc.deallocate(); - lw_flux_dn_loc.deallocate(); - col_dry.deallocate(); - - // Finalize the driver. YAKL will be finalized inside - // RRTMGPRadiation::finalize_impl after RRTMGP has had the - // opportunity to deallocate all it's arrays. - ad.finalize(); -} -#endif -#ifdef RRTMGP_ENABLE_KOKKOS TEST_CASE("rrtmgp_scream_standalone_k", "") { #ifdef RRTMGP_LAYOUT_LEFT using layout_t = Kokkos::LayoutLeft; @@ -442,7 +64,7 @@ TEST_CASE("rrtmgp_scream_standalone_k", "") { REQUIRE(rrtmgpTest::file_exists(inputfile.c_str())); REQUIRE(rrtmgpTest::file_exists(baseline.c_str())); - // Initialize yakl + // Initialize kokkos scream::init_kls(); // Read reference fluxes from baseline file @@ -497,7 +119,7 @@ TEST_CASE("rrtmgp_scream_standalone_k", "") { // Make sure we have the right dimension sizes REQUIRE(nlay == static_cast(sw_flux_up_ref.extent(1))-1); - // Create yakl arrays to store the input data + // Create kokkos arrays to store the input data auto p_lay = real2dk("p_lay", ncol, nlay); auto t_lay = real2dk("t_lay", ncol, nlay); auto p_del = real2dk("p_del", ncol, nlay); @@ -601,8 +223,8 @@ TEST_CASE("rrtmgp_scream_standalone_k", "") { auto T_mid0 = real2dk("T_mid0", ncol, nlay); Kokkos::deep_copy(T_mid0, t_lay); - // Grab views from field manager and copy in values from yakl arrays. Making - // copies is necessary since the yakl::Array take in data arranged with ncol + // Grab views from field manager and copy in values from kokkos arrays. Making + // copies is necessary since the kokkos::View take in data arranged with ncol // as the fastest index, but the field manager expects the 2nd dimension as // the fastest index. auto d_pmid = field_mgr.get_field("p_mid").get_view(); @@ -686,7 +308,7 @@ TEST_CASE("rrtmgp_scream_standalone_k", "") { ad.run(300); // Check values; The correct values have been stored in the field manager, we need to - // copy back to YAKL::Array. + // copy back to Kokkos::View. auto d_sw_flux_up = field_mgr.get_field("sw_flux_up").get_view(); auto d_sw_flux_dn = field_mgr.get_field("sw_flux_dn").get_view(); auto d_sw_flux_dn_dir = field_mgr.get_field("sw_flux_dn_dir").get_view(); @@ -748,11 +370,10 @@ TEST_CASE("rrtmgp_scream_standalone_k", "") { REQUIRE(utils_t::all_close(lw_flux_up_loc , lw_flux_up_test , 1.0)); REQUIRE(utils_t::all_close(lw_flux_dn_loc , lw_flux_dn_test , 1.0)); - // Finalize the driver. YAKL will be finalized inside + // Finalize the driver. Kokkos will be finalized inside // RRTMGPRadiation::finalize_impl after RRTMGP has had the // opportunity to deallocate all it's arrays. ad.finalize(); } -#endif } From 612f34b580ea8f00d7ed61a73f64bc86196eb7e0 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Mon, 12 May 2025 10:06:48 -0600 Subject: [PATCH 2/4] Remove legacy YAKL build settings (CUDA_BUILD etc) --- components/eam/src/physics/rrtmgp/external | 2 +- components/eamxx/CMakeLists.txt | 1 + components/eamxx/src/physics/shoc/CMakeLists.txt | 2 +- components/homme/CMakeLists.txt | 6 +++--- components/homme/cmake/HommeMacros.cmake | 2 +- components/homme/cmake/SetCompilerFlags.cmake | 1 - components/homme/cmake/machineFiles/aurora-aot.cmake | 1 - components/homme/cmake/machineFiles/aurora-jit.cmake | 1 - components/homme/cmake/machineFiles/crusher-gpumpi.cmake | 4 ---- components/homme/cmake/machineFiles/frontier-bfb.cmake | 2 -- components/homme/cmake/machineFiles/perlmutter-gnu.cmake | 1 - components/homme/cmake/machineFiles/polaris-a100.sh | 2 -- components/homme/cmake/machineFiles/spot-aot-AB2.cmake | 1 - components/homme/cmake/machineFiles/summit-p9.cmake | 2 +- components/homme/test_execs/share_kokkos_ut/CMakeLists.txt | 6 +++--- 15 files changed, 11 insertions(+), 23 deletions(-) diff --git a/components/eam/src/physics/rrtmgp/external b/components/eam/src/physics/rrtmgp/external index 7a96f68db5b4..a4f340db2d54 160000 --- a/components/eam/src/physics/rrtmgp/external +++ b/components/eam/src/physics/rrtmgp/external @@ -1 +1 @@ -Subproject commit 7a96f68db5b4cba11e229968f8c17f0d74913db9 +Subproject commit a4f340db2d545385a57c19e9c4d3ec0c399dc7fb diff --git a/components/eamxx/CMakeLists.txt b/components/eamxx/CMakeLists.txt index 388f7ba6d690..9bc9366610f5 100644 --- a/components/eamxx/CMakeLists.txt +++ b/components/eamxx/CMakeLists.txt @@ -219,6 +219,7 @@ endif() # and then adding to eamxx_config.h: # #cmakedefine RRTMGP_EXPENSIVE_CHECKS option (SCREAM_RRTMGP_DEBUG "Turn on extra debug checks in RRTMGP" ${SCREAM_DEBUG}) +set(SCREAM_RRTMGP_ENABLE_KOKKOS TRUE) set(SCREAM_DOUBLE_PRECISION TRUE CACHE BOOL "Set to double precision (default True)") diff --git a/components/eamxx/src/physics/shoc/CMakeLists.txt b/components/eamxx/src/physics/shoc/CMakeLists.txt index f6106bb01deb..277b2a2ee3bd 100644 --- a/components/eamxx/src/physics/shoc/CMakeLists.txt +++ b/components/eamxx/src/physics/shoc/CMakeLists.txt @@ -86,7 +86,7 @@ if (NOT SCREAM_DEBUG) endif() endif() -if (HIP_BUILD) +if (Kokkos_ENABLE_HIP) #this is needed for crusher even with small kernels set_source_files_properties(shoc_diag_second_shoc_moments_disp.cpp PROPERTIES COMPILE_FLAGS -O1) endif() diff --git a/components/homme/CMakeLists.txt b/components/homme/CMakeLists.txt index 4bbfb3d73acf..25d51398e429 100644 --- a/components/homme/CMakeLists.txt +++ b/components/homme/CMakeLists.txt @@ -314,15 +314,15 @@ MESSAGE(STATUS "CXX Flags = ${CMAKE_CXX_FLAGS}") MESSAGE(STATUS "Linker Flags = ${CMAKE_EXE_LINKER_FLAGS}") SET (HOMMEXX_ENABLE_GPU FALSE) -SET (HOMMEXX_ENABLE_GPU_F90 FALSE) +SET (HOMMEXX_ENABLE_GPU_F90 FALSE) IF (HOMME_USE_KOKKOS) - IF (CUDA_BUILD OR HIP_BUILD OR SYCL_BUILD) + IF (Kokkos_ENABLE_CUDA OR Kokkos_ENABLE_HIP OR Kokkos_ENABLE_SYCL) SET (DEFAULT_VECTOR_SIZE 1) SET (HOMMEXX_ENABLE_GPU TRUE) SET (HOMMEXX_ENABLE_GPU_F90 TRUE) - IF (SYCL_BUILD) + IF (Kokkos_ENABLE_SYCL) SET (DISABLE_TIMERS_IN_FIRST_STEP TRUE) ENDIF() ELSE () diff --git a/components/homme/cmake/HommeMacros.cmake b/components/homme/cmake/HommeMacros.cmake index 486b30b28334..5ead59d677cf 100644 --- a/components/homme/cmake/HommeMacros.cmake +++ b/components/homme/cmake/HommeMacros.cmake @@ -114,7 +114,7 @@ macro(createTestExec execName execType macroNP macroNC ADD_EXECUTABLE(${execName} ${EXEC_SOURCES}) # For SYCL builds it is suggested to use CXX linker with `-fortlib` # for mixed-language setups - IF(SYCL_BUILD) + IF(Kokkos_ENABLE_SYCL) SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE CXX) ELSE() SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE Fortran) diff --git a/components/homme/cmake/SetCompilerFlags.cmake b/components/homme/cmake/SetCompilerFlags.cmake index b1469d455c9b..3d7da0498ba7 100644 --- a/components/homme/cmake/SetCompilerFlags.cmake +++ b/components/homme/cmake/SetCompilerFlags.cmake @@ -83,7 +83,6 @@ IF (${HOMME_USE_CXX}) STRING (FIND "${CXX_COMPILER_VERSION_OUT}" "nvcc" pos) IF (${pos} GREATER -1) SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --expt-extended-lambda") - SET (CUDA_BUILD TRUE) MESSAGE (STATUS "Found CUDA, with nvcc as C++ backend compiler. Great.") ELSE () MESSAGE ("\n ********************************** WARNING ********************************** ") diff --git a/components/homme/cmake/machineFiles/aurora-aot.cmake b/components/homme/cmake/machineFiles/aurora-aot.cmake index e878d3e1250b..2a9499ab3edc 100644 --- a/components/homme/cmake/machineFiles/aurora-aot.cmake +++ b/components/homme/cmake/machineFiles/aurora-aot.cmake @@ -28,7 +28,6 @@ SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "") SET(USE_TRILINOS OFF CACHE BOOL "") -SET(SYCL_BUILD TRUE CACHE BOOL "") SET(HOMME_ENABLE_COMPOSE TRUE CACHE BOOL "") SET(Kokkos_ARCH_SPR ON CACHE BOOL "") diff --git a/components/homme/cmake/machineFiles/aurora-jit.cmake b/components/homme/cmake/machineFiles/aurora-jit.cmake index 8c35930fdf66..c8658cde323a 100644 --- a/components/homme/cmake/machineFiles/aurora-jit.cmake +++ b/components/homme/cmake/machineFiles/aurora-jit.cmake @@ -25,7 +25,6 @@ set(Kokkos_ROOT $ENV{KOKKOS_HOME} CACHE STRING "") SET(USE_TRILINOS OFF CACHE BOOL "") -SET(SYCL_BUILD TRUE CACHE BOOL "") SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "") SET(CMAKE_CXX_STANDARD 17) diff --git a/components/homme/cmake/machineFiles/crusher-gpumpi.cmake b/components/homme/cmake/machineFiles/crusher-gpumpi.cmake index 0b054b6dd792..32acab56db59 100644 --- a/components/homme/cmake/machineFiles/crusher-gpumpi.cmake +++ b/components/homme/cmake/machineFiles/crusher-gpumpi.cmake @@ -27,10 +27,6 @@ SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "") SET(USE_TRILINOS OFF CACHE BOOL "") -#CUDA_BUILD is set in SetCompilersFlags, after findPackage(Cuda) -#i haven't extend it to hip, set it here instead -SET(HIP_BUILD TRUE CACHE BOOL "") - #uncomment this if using internal kokkos build #SET(Kokkos_ENABLE_SERIAL ON CACHE BOOL "") ####SET(CMAKE_CXX_STANDARD "14" CACHE STRING "") diff --git a/components/homme/cmake/machineFiles/frontier-bfb.cmake b/components/homme/cmake/machineFiles/frontier-bfb.cmake index 15f97742dd7b..921ce70f3342 100644 --- a/components/homme/cmake/machineFiles/frontier-bfb.cmake +++ b/components/homme/cmake/machineFiles/frontier-bfb.cmake @@ -21,8 +21,6 @@ SET (HOMMEXX_BFB_TESTING TRUE CACHE BOOL "") SET(USE_TRILINOS OFF CACHE BOOL "") -SET(HIP_BUILD TRUE CACHE BOOL "") - SET(Kokkos_ENABLE_SERIAL ON CACHE BOOL "") #SET(Kokkos_ENABLE_DEBUG OFF CACHE BOOL "") SET(Kokkos_ARCH_VEGA90A ON CACHE BOOL "") diff --git a/components/homme/cmake/machineFiles/perlmutter-gnu.cmake b/components/homme/cmake/machineFiles/perlmutter-gnu.cmake index a27f83900c1f..0bfe3f74253f 100644 --- a/components/homme/cmake/machineFiles/perlmutter-gnu.cmake +++ b/components/homme/cmake/machineFiles/perlmutter-gnu.cmake @@ -46,7 +46,6 @@ SET(CMAKE_CXX_COMPILER "CC" CACHE STRING "") # Note: No longer need to set MPICH_CXX env variable and perhaps # NVCC_WRAPPER_DEFAULT_COMPILER. Ignore the warning about nvcc_wrapper during # configuration. -SET(CUDA_BUILD TRUE CACHE STRING "") SET(CXXLIB_SUPPORTED_CACHE FALSE CACHE BOOL "") diff --git a/components/homme/cmake/machineFiles/polaris-a100.sh b/components/homme/cmake/machineFiles/polaris-a100.sh index 2b63c61a55e7..127128fbeec3 100644 --- a/components/homme/cmake/machineFiles/polaris-a100.sh +++ b/components/homme/cmake/machineFiles/polaris-a100.sh @@ -30,8 +30,6 @@ SET(USE_QUEUING FALSE CACHE BOOL "") SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "") -SET(CUDA_BUILD TRUE CACHE BOOL "") - #SET(HOMMEXX_BFB_TESTING TRUE CACHE BOOL "") SET(USE_TRILINOS OFF CACHE BOOL "") diff --git a/components/homme/cmake/machineFiles/spot-aot-AB2.cmake b/components/homme/cmake/machineFiles/spot-aot-AB2.cmake index 23fad2361ccf..2ab993b3c80e 100644 --- a/components/homme/cmake/machineFiles/spot-aot-AB2.cmake +++ b/components/homme/cmake/machineFiles/spot-aot-AB2.cmake @@ -30,7 +30,6 @@ SET (NetCDF_C_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12 SET(USE_TRILINOS OFF CACHE BOOL "") -SET(SYCL_BUILD TRUE CACHE BOOL "") SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "") #SET(CMAKE_CXX_STANDARD 17) diff --git a/components/homme/cmake/machineFiles/summit-p9.cmake b/components/homme/cmake/machineFiles/summit-p9.cmake index 9136dc0bfbdc..6ad3fec33c40 100644 --- a/components/homme/cmake/machineFiles/summit-p9.cmake +++ b/components/homme/cmake/machineFiles/summit-p9.cmake @@ -6,7 +6,7 @@ #SET (HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "") -#modules, note no cuda, otherwise CUDA_BUILD=true +#modules, note no cuda #module load cmake/3.6.1 gcc/6.4.0 netlib-lapack/3.6.1 #module load netcdf/4.6.1 netcdf-fortran/4.4.4 #module load hdf5/1.10.3 diff --git a/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt b/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt index bc788462ce6e..8719b5fe8b14 100644 --- a/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt +++ b/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt @@ -7,11 +7,11 @@ SET(UTILS_TIMING_BIN_DIR ${HOMME_BINARY_DIR}/utils/cime/CIME/non_py/src/timing) SET(UTILS_TIMING_DIRS ${UTILS_TIMING_SRC_DIR} ${UTILS_TIMING_BIN_DIR}) # Place common CPP definitions here. -# Note: need CUDA_BUILD and HOMMEXX_BFB_TESTING here, since the share +# Note: HOMMEXX_BFB_TESTING here, since the share # unit tests do not include a config.h file SET (COMMON_DEFINITIONS NP=4 NC=4) -IF (CUDA_BUILD OR HIP_BUILD OR SYCL_BUILD) - SET(COMMON_DEFINITIONS ${COMMON_DEFINITIONS} HOMMEXX_ENABLE_GPU_F90) +IF (HOMMEXX_ENABLE_GPU_F90) + SET(COMMON_DEFINITIONS ${COMMON_DEFINITIONS} HOMMEXX_ENABLE_GPU_F90) ENDIF() IF (HOMMEXX_BFB_TESTING) SET(COMMON_DEFINITIONS ${COMMON_DEFINITIONS} HOMMEXX_BFB_TESTING) From 9db9236f831ea5f09c3d4b7c2caf400a3ce14488 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Mon, 12 May 2025 10:54:29 -0600 Subject: [PATCH 3/4] Fix enable kokkos --- components/eamxx/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/components/eamxx/CMakeLists.txt b/components/eamxx/CMakeLists.txt index 9bc9366610f5..b40fbffd392c 100644 --- a/components/eamxx/CMakeLists.txt +++ b/components/eamxx/CMakeLists.txt @@ -219,7 +219,8 @@ endif() # and then adding to eamxx_config.h: # #cmakedefine RRTMGP_EXPENSIVE_CHECKS option (SCREAM_RRTMGP_DEBUG "Turn on extra debug checks in RRTMGP" ${SCREAM_DEBUG}) -set(SCREAM_RRTMGP_ENABLE_KOKKOS TRUE) +# This can be removed once rrtmgp is kokkos-only. +add_definitions("-DRRTMGP_ENABLE_KOKKOS") set(SCREAM_DOUBLE_PRECISION TRUE CACHE BOOL "Set to double precision (default True)") From 7cdc48d33c7848db42b65da7081303573f7b4aa9 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Tue, 13 May 2025 13:41:57 -0600 Subject: [PATCH 4/4] Fix CUDA --- components/eamxx/src/physics/rrtmgp/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt index cf134dd4a2d7..e844940c4b4d 100644 --- a/components/eamxx/src/physics/rrtmgp/CMakeLists.txt +++ b/components/eamxx/src/physics/rrtmgp/CMakeLists.txt @@ -11,6 +11,9 @@ set(EAM_RRTMGP_DIR ${SCREAM_BASE_DIR}/../eam/src/physics/rrtmgp) # NOTE: The external RRTMGP build needs some fixes to work with CUDA in a library build, so for now we will build these ourselves add_library(rrtmgp INTERFACE) target_compile_definitions(rrtmgp INTERFACE EAMXX_HAS_RRTMGP) +if (Kokkos_ENABLE_CUDA) + target_compile_options(rrtmgp INTERFACE $<$:--expt-relaxed-constexpr>) +endif() if (NOT TARGET Kokkos::kokkos) find_package(Kokkos REQUIRED)