Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions .github/workflows/eamxx-sa-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ on:
options:
- gcc-openmp
- gcc-cuda
- gcc-hip
- all
bless:
description: 'Generate baselines'
Expand Down Expand Up @@ -161,3 +162,60 @@ jobs:
submit: ${{ env.submit }}
cmake-configs: Kokkos_ARCH_HOPPER90=${{ env.Hopper }};Kokkos_ARCH_AMPERE80=${{ env.Ampere }};Kokkos_ARCH_VOLTA70=${{ env.Volta }};CMAKE_CUDA_ARCHITECTURES=${{ env.CUDA_ARCH }};SCREAM_SMALL_KERNELS=${{ matrix.test.SK }}
ekat: ${{ env.ekat }}
gcc-hip:
if: |
${{
github.event_name != 'workflow_dispatch' ||
(
github.event.inputs.job_to_run == 'gcc-hip' ||
github.event.inputs.job_to_run == 'all'
)
}}
runs-on: [self-hosted, ghci-snl-hip, hip, gcc]
strategy:
fail-fast: false
matrix:
test:
# - build_type: sp NEED TO CHECK WHY DEBUG BUILDS FAIL
# SK: OFF
# - build_type: dbg
# SK: ON
- build_type: opt
SK: OFF
name: gcc-hip / ${{ matrix.test.build_type }}
steps:
- name: Check out the repository
uses: actions/checkout@v4
with:
persist-credentials: false
show-progress: false
submodules: recursive
- name: Show action trigger
uses: ./.github/actions/show-workflow-trigger
- name: Ensure CACTS is installed
run: |
python3 -m pip install --user --upgrade --trusted-host pypi.org cacts
- name: Run tests
run: |
cmd="cacts -m ghci-snl-hip -t ${{ matrix.test.build_type }} -b auto -r ./components/eamxx"
if [[ "${{ env.generate }}" == "true" ]]; then
cmd+=" -g"
fi
if [[ "${{ env.submit }}" == "true" ]]; then
cmd+=" -s"
fi
if [[ "${{ env.ekat }}" == "true" ]]; then
cmd+=" -c EKAT_ENABLE_TESTS=ON"
fi
echo "CACTS call: $cmd"
$cmd
- name: Upload files
if: always()
uses: actions/upload-artifact@v4
with:
name: log-files-${{ matrix.test.build_type }}-ghci-snl-hip
path: |
components/eamxx/ctest-build/*/Testing/Temporary/Last*.log
components/eamxx/ctest-build/*/ctest_resource_file.json
components/eamxx/ctest-build/*/ctest_script.cmake
components/eamxx/ctest-build/*/CMakeCache.txt
19 changes: 15 additions & 4 deletions components/eamxx/cacts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,13 @@ project:
name: EAMxx
baseline_gen_label: baseline_gen
baseline_summary_file: baseline_list
cmake_vars_names:
enable_baselines:
cmake_settings:
baselines_on:
SCREAM_ENABLE_BASELINE_TESTS: ON
generate_baselines:
baselines_off:
SCREAM_ENABLE_BASELINE_TESTS: OFF
baselines_only:
SCREAM_ONLY_GENERATE_BASELINES: ON
baselines_dir: SCREAM_BASELINES_DIR
cdash:
drop_site: my.cdash.org
drop_location: /submit.php?project=E3SM
Expand Down Expand Up @@ -140,6 +141,14 @@ machines:
gpu_arch: "cuda"
num_run_res: "$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)"

ghci-snl-hip:
baselines_dir: /projects/e3sm/baselines/scream/ghci-snl-hip
gpu_arch: hip
num_run_res: $(rocm-smi --showproductname | grep 'GPU\[' | awk '{print $1}' | sort -u | wc -l)
env_setup:
- source /etc/profile.d/setup-user-env.sh
- export MPICH_CXX=hipcc

ghci-oci:
env_setup: ["eval $(${project.root_dir}/../../cime/CIME/Tools/get_case_env -c SMS.ne4pg2_ne4pg2.F2010-SCREAMv1.ghci-oci_gnu)"]

Expand Down Expand Up @@ -219,6 +228,8 @@ configurations:
description: null
uses_baselines: True
on_by_default: True
cmake_args:
SCREAM_BASELINES_DIR: ${machine.baselines_dir}/${build.longname}

dbg:
longname: full_debug
Expand Down
17 changes: 17 additions & 0 deletions components/eamxx/cmake/machine-files/ghci-snl-hip.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Common settings for our ghci images
include(${CMAKE_CURRENT_LIST_DIR}/ghci-snl.cmake)

# Set SCREAM_MACHINE
set(SCREAM_MACHINE ghci-snl-hip CACHE STRING "")

# Enable CUDA in kokkos
set (EKAT_MACH_FILES_PATH ${CMAKE_CURRENT_LIST_DIR}/../../../../externals/ekat/cmake/machine-files)
include (${EKAT_MACH_FILES_PATH}/kokkos/hip.cmake)

set(EKAT_MPI_NP_FLAG "-n" CACHE STRING "The mpirun flag for designating the total number of ranks")

# TODO: rebuild cuda image with cuda-aware MPI, so we can set this to ON
option(SCREAM_MPI_ON_DEVICE "Whether to use device pointers for MPI calls" OFF)

# Currently, we have 2 GPUs/node on Blake, and we run a SINGLE build per node, so we can fit 2 ranks there
set(SCREAM_TEST_MAX_RANKS 2 CACHE STRING "Upper limit on ranks for mpi tests")
2 changes: 1 addition & 1 deletion components/eamxx/scripts/eamxx-cacts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash

# Ensure cacts package is installed and up to date
pip install --user --upgrade cacts
python3 -m pip install --user --trusted-host pypi.org --upgrade cacts

# Run cacts
cacts "$@"
13 changes: 13 additions & 0 deletions components/eamxx/scripts/machines_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,19 @@ def setup(cls):
cls.gpu_arch = "cuda"
cls.num_run_res = int(run_cmd_no_fail("nvidia-smi --query-gpu=name --format=csv,noheader | wc -l"))

###############################################################################
class GHCISNLHip(Machine):
###############################################################################
concrete = True
@classmethod
def setup(cls):
super().setup_base(name="ghci-snl-hip")
cls.baselines_dir = "/projects/e3sm/baselines/scream/ghci-snl-hip"
cls.gpu_arch = "hip"
cls.num_run_res = int(run_cmd_no_fail(r"rocm-smi --showproductname | grep 'GPU\[' | awk '{print $1}' | sort -u | wc -l"))
cls.cxx_compiler = "mpicxx"
cls.env_setup = ["source /etc/profile.d/setup-user-env.sh && export MPICH_CXX=hipcc"]

###############################################################################
class GHCIOCI(Machine):
###############################################################################
Expand Down
4 changes: 2 additions & 2 deletions components/eamxx/src/control/atmosphere_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ void AtmosphereDriver::setup_column_conservation_checks ()
const std::string fail_handling_type_str =
driver_options_pl.get<std::string>("column_conservation_checks_fail_handling_type", "warning");

CheckFailHandling fail_handling_type;
CheckFailHandling fail_handling_type = CheckFailHandling::Fatal;
if (fail_handling_type_str == "warning") {
fail_handling_type = CheckFailHandling::Warning;
} else if (fail_handling_type_str == "fatal") {
Expand Down Expand Up @@ -995,7 +995,7 @@ void AtmosphereDriver::create_logger () {
"Invalid string for 'Atm Log File': '" + log_fname + "'.\n");

auto str2lev = [](const std::string& s, const std::string& name) {
LogLevel lev;
LogLevel lev = LogLevel::err;
if (s=="trace") {
lev = LogLevel::trace;
} else if (s=="debug") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace scream
ekat::logger::LogLevel str2LogLevel (const std::string& s) {
using namespace ekat::logger;

ekat::logger::LogLevel log_level;
ekat::logger::LogLevel log_level = LogLevel::err;
if (s=="off") {
log_level = LogLevel::off;
} else if (s=="trace") {
Expand Down
2 changes: 1 addition & 1 deletion components/eamxx/src/share/io/eamxx_scorpio_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1415,7 +1415,7 @@ T get_attribute (const std::string& filename,
err = PIOc_inq_atttype(pf.file->ncid,varid,attname.c_str(),&att_type);
check_scorpio_noerr(err,filename,"attribute",attname,"get_attribute","inq_atttype");

T val;
T val = {};
if (att_type!=nctype(get_dtype<T>())) {

if (att_type==PIO_INT) {
Expand Down
2 changes: 1 addition & 1 deletion components/eamxx/src/share/io/tests/io_basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void add (const Field& f, const double v) {
}

int get_dt (const std::string& freq_units) {
int dt;
int dt = -1;
if (freq_units=="nsteps") {
dt = 1;
} else if (freq_units=="nsecs") {
Expand Down
2 changes: 1 addition & 1 deletion components/eamxx/src/share/io/tests/io_filled.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void set (const Field& f, const double v) {
}

int get_dt (const std::string& freq_units) {
int dt;
int dt = -1;
if (freq_units=="nsteps") {
dt = 1;
} else if (freq_units=="nsecs") {
Expand Down
Loading