Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/test-all-scream/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ runs:
if: always()
uses: actions/upload-artifact@v4
with:
name: log-files-${{ inputs.build_type }}
name: log-files-${{ inputs.build_type }}-${{ inputs.machine }}
path: |
components/eamxx/ctest-build/*/Testing/Temporary/Last*.log
components/eamxx/ctest-build/*/ctest_resource_file.json
Expand Down
13 changes: 4 additions & 9 deletions .github/workflows/eamxx-scripts-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ on:
paths:
- components/eamxx/scripts/**
- components/eamxx/cime_config/*.py
pull_request_review:
types: [submitted]

# Manual run for debug purposes only
workflow_dispatch:
Expand All @@ -19,13 +17,10 @@ on:
- cron: '0 7 * * *' # Runs at 7 AM UTC, which is midnight MT during Standard Time

concurrency:
# Two runs are in the same group if:
# - they have the same trigger
# - if trigger=pull_request/pull_request_review, the PR number must match
# - if trigger=workflow_dispatch/schedule: no concurrency
group: ${{ github.workflow }}-${{ github.event_name }}-${{
(github.event_name == 'pull_request' || github.event_name == 'pull_request_review') && github.event.pull_request.number || github.run_id
}}
# Two runs are in the same group if they are testing the same git ref
# - if trigger=pull_request, the ref is refs/pull/<PR_NUMBER>/merge
# - for other triggers, the ref is the branch tested
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
Expand Down
84 changes: 48 additions & 36 deletions .github/workflows/eamxx-standalone-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ on:
- components/eam/src/physics/p3/scream/**
- components/eam/src/physics/cam/**
- .github/workflows/eamxx-standalone-testing.yml
pull_request_review:
types: [submitted]

# Manual run is used to bless
workflow_dispatch:
Expand All @@ -23,6 +21,7 @@ on:
type: choice
options:
- gcc-openmp
- gcc-cuda
bless:
description: 'Generate baselines'
required: true
Expand All @@ -33,13 +32,10 @@ on:
- cron: '0 7 * * *' # Runs at 7 AM UTC, which is midnight MT during Standard Time

concurrency:
# Two runs are in the same group if:
# - they have the same trigger
# - if trigger=pull_request/pull_request_review, the PR number must match
# - if trigger=workflow_dispatch/schedule: no concurrency
group: ${{ github.workflow }}-${{ github.event_name }}-${{
(github.event_name == 'pull_request' || github.event_name == 'pull_request_review') && github.event.pull_request.number || github.run_id
}}
# Two runs are in the same group if they are testing the same git ref
# - if trigger=pull_request, the ref is refs/pull/<PR_NUMBER>/merge
# - for other triggers, the ref is the branch tested
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
Expand Down Expand Up @@ -86,30 +82,46 @@ jobs:
generate: ${{ env.generate }}
submit: ${{ env.submit }}
cmake-configs: Kokkos_ENABLE_OPENMP=ON
# cuda:
# # Disable until the CUDA container is up and running. When CUDA container is availabe, remove
# # this line and uncomment the next if
# if: false
# # Runs always for pull_request. For workflow_dispatch, user must request this machine
# # if: ${{ github.event_name == 'pull_request' || contains(github.event.inputs.jobs_to_run, 'openmp-gcc') }}
# runs-on: [self-hosted, cuda]
# strategy:
# fail-fast: false
# matrix:
# build_type: [sp, dbg, fpe, opt]
# name: cuda-${{ matrix.build_type }}
# steps:
# - name: Show action trigger
# uses: ./.github/actions/print-workflow-trigger
# - name: Check out the repository
# uses: actions/checkout@v4
# with:
# persist-credentials: false
# show-progress: false
# submodules: recursive
# - name: Run tests
# uses: ./.github/actions/test-all-scream
# with:
# build_type: ${{ matrix.build_type }}
# machine: ghci-snl-cuda
# run_type: at-run
gcc-cuda:
runs-on: [self-hosted, ghci-snl-cuda, cuda, gcc]
strategy:
fail-fast: false
matrix:
build_type: [sp, dbg, opt]
if: ${{ !(github.event_name == 'workflow_dispatch' && github.event.inputs.jobs_list != 'gcc-cuda') }}
name: gcc-cuda / ${{ matrix.build_type }}
steps:
- name: Check out the repository
uses: actions/checkout@v4
with:
persist-credentials: false
show-progress: false
submodules: recursive
- name: Show action trigger
uses: ./.github/actions/show-workflow-trigger
- name: Check for skip labels
if: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_review' }}
uses: ./.github/actions/check-skip-labels
with:
skip_labels: 'AT: skip gcc,AT: skip cuda,AT: skip eamxx-sa,AT: skip eamxx-all'
token: ${{ secrets.GITHUB_TOKEN }}
pr_number: ${{ github.event.pull_request.number }}
- name: Set test-all inputs based on event specs
run: |
echo "submit=false" >> $GITHUB_ENV
echo "generate=false" >> $GITHUB_ENV
if [ "${{ github.event_name }}" == "schedule" ]; then
echo "submit=true" >> $GITHUB_ENV
elif [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
if [ "${{ inputs.bless }}" == "true" ]; then
echo "generate=true" >> $GITHUB_ENV
fi
fi
- name: Run tests
uses: ./.github/actions/test-all-scream
with:
build_type: ${{ matrix.build_type }}
machine: ghci-snl-cuda
generate: ${{ env.generate }}
submit: ${{ env.submit }}
cmake-configs: Kokkos_ARCH_VOLTA70=ON;CMAKE_CUDA_ARCHITECTURES=70
17 changes: 4 additions & 13 deletions .github/workflows/eamxx-v1-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ on:
- components/eam/src/physics/p3/scream/**
- components/eam/src/physics/cam/**
- .github/workflows/eamxx-v1-testing.yml
pull_request_review:
types: [submitted]

# Manual run is used to bless
workflow_dispatch:
Expand All @@ -29,13 +27,10 @@ on:
type: boolean

concurrency:
# Two runs are in the same group if:
# - they have the same trigger
# - if trigger=pull_request/pull_request_review: the PR number must match
# - if trigger=workflow_dispatch: no concurrency
group: ${{ github.workflow }}-${{ github.event_name }}-${{
(github.event_name == 'pull_request' || github.event_name == 'pull_request_review') && github.event.pull_request.number || github.run_id
}}
# Two runs are in the same group if they are testing the same git ref
# - if trigger=pull_request, the ref is refs/pull/<PR_NUMBER>/merge
# - for other triggers, the ref is the branch tested
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
Expand All @@ -54,10 +49,6 @@ jobs:
short_name: SMS_D_Ln5.ne4pg2_oQU480.F2010-SCREAMv1-MPASSI.scream-mam4xx-all_mam4xx_procs
fail-fast: false
name: cpu-gcc / ${{ matrix.test.short_name }}
# Run this workflow if:
# - workflow_dispatch: user requested this job.
# - schedule: always:
# - pull_request/pull_request_review: matching skip label is NOT found
if: ${{ !(github.event_name == 'workflow_dispatch' && github.event.inputs.jobs_list != 'cpu-gcc') }}
steps:
- name: Check out the repository
Expand Down
14 changes: 14 additions & 0 deletions components/eamxx/cmake/machine-files/ghci-snl-cuda.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Common settings for our ghci images
include(${CMAKE_CURRENT_LIST_DIR}/ghci-snl.cmake)

# Set SCREAM_MACHINE
set(SCREAM_MACHINE ghci-snl-cuda CACHE STRING "")

# Enable CUDA in kokkos
set (EKAT_MACH_FILES_PATH ${CMAKE_CURRENT_LIST_DIR}/../../../../externals/ekat/cmake/machine-files)
include (${EKAT_MACH_FILES_PATH}/kokkos/cuda.cmake)

set(EKAT_MPI_NP_FLAG "-n" CACHE STRING "The mpirun flag for designating the total number of ranks")

# TODO: rebuild cuda image with cuda-aware MPI, so we can set this to ON
option(SCREAM_MPI_ON_DEVICE "Whether to use device pointers for MPI calls" OFF)
10 changes: 10 additions & 0 deletions components/eamxx/scripts/machines_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,16 @@ def setup(cls):
super().setup_base("ghci-snl-cpu")
cls.baselines_dir = "/projects/e3sm/baselines/scream/ghci-snl-cpu"

###############################################################################
class GHCISNLCuda(Machine):
###############################################################################
concrete = True
@classmethod
def setup(cls):
super().setup_base(name="ghci-snl-cuda",num_bld_res=16,num_run_res=1)
cls.baselines_dir = "/projects/e3sm/baselines/scream/ghci-snl-cuda"
cls.gpu_arch = "cuda"

###############################################################################
class Lassen(Machine):
###############################################################################
Expand Down
7 changes: 6 additions & 1 deletion components/eamxx/src/diagnostics/tests/wind_speed_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ TEST_CASE("wind_speed")
register_diagnostics();

constexpr int ntests = 5;
#ifdef NDEBUG
constexpr int ulp_tol = 1;
#else
constexpr int ulp_tol = 0;
#endif
for (int itest=0; itest<ntests; ++itest) {
// Randomize wind
randomize(uv,engine,pdf);
Expand All @@ -87,7 +92,7 @@ TEST_CASE("wind_speed")
for (int ilev=0; ilev<nlevs; ++ilev) {
const auto u = uv_h (icol,0,ilev);
const auto v = uv_h (icol,1,ilev);
REQUIRE (ws_h(icol,ilev) == std::sqrt(u*u+v*v));
REQUIRE_THAT (ws_h(icol,ilev), Catch::Matchers::WithinULP(std::sqrt(u*u+v*v),ulp_tol));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ int main (int argc, char** argv) {
auto logger = std::make_shared<logger_t>("",LogLevel::info,comm);

// Get filenames from command line
if (argc != 3) {
if (argc < 3) {
std::string msg = "Missing required inputs. Usage:\n";
msg += argv[0];
msg += " inputfile baseline\n";
logger->error(msg);
return 1;
}
std::string inputfile(argv[argc-2]);
std::string baseline(argv[argc-1]);
std::string inputfile(argv[1]);
std::string baseline(argv[2]);

// Initialize yakl
yakl::init();
Expand Down