From 61efe1d2e600bc66bebfc66aebe6f6f207d5b5a8 Mon Sep 17 00:00:00 2001
From: Michael J Schmidt <mjschm@sandia.gov>
Date: Mon, 25 Aug 2025 15:33:21 -0600
Subject: [PATCH 1/4] ff mam x validation for new compare script

---
 src/validation/mam_x_validation | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/validation/mam_x_validation b/src/validation/mam_x_validation
index a2617c5d3..8aec10cb9 160000
--- a/src/validation/mam_x_validation
+++ b/src/validation/mam_x_validation
@@ -1 +1 @@
-Subproject commit a2617c5d3009b233bc6460406b074cc8a1bd2a19
+Subproject commit 8aec10cb969aa795ff83dfe1fbccd275d36e9f63

From dfa8dd24e7382658210f807b4652ee4cc2c6ffac Mon Sep 17 00:00:00 2001
From: Michael J Schmidt <mjschm@sandia.gov>
Date: Wed, 20 Aug 2025 13:13:08 -0600
Subject: [PATCH 2/4] adds AMD/HIP autotesting via AT2

---
 .github/workflows/README.md               |   2 +-
 .github/workflows/at2_gcc-hip.yml         | 122 ++++++++++++++++++++++
 .github/workflows/m4x_autotester_main.yml |  13 +++
 3 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/at2_gcc-hip.yml

diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index 576d80d15..1a267198d 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -12,7 +12,7 @@ To do this, testing is initialized via the top-level workflow, `MAM4xx Autoteste
 
 #### GPU-based Testing
 
-| Test Name                         | GPU Brand | GPU Type | Micoarchitecture | Compute Capability | Machine | Compilers                    |
+| Test Name                         | GPU Brand | GPU Type | Microarchitecture | Compute Capability | Machine | Compilers                    |
 | --------------------------------- | --------- | -------- | ---------------- | ------------------ | ------- | ---------------------------- |
 | GPU AT2 gcc 12.3 cuda 12.1        | NVIDIA    | H100     | Hopper           | 9.0                | blake   | `gcc` 12.3.0/`nvcc` 12.1.105 |
 
diff --git a/.github/workflows/at2_gcc-hip.yml b/.github/workflows/at2_gcc-hip.yml
new file mode 100644
index 000000000..8cbd94ec3
--- /dev/null
+++ b/.github/workflows/at2_gcc-hip.yml
@@ -0,0 +1,122 @@
+name: "GPU AT2 gcc 13.3 hip 6.2"
+
+on:
+  workflow_call:
+    inputs:
+      precision:
+        required: false
+        type: string
+      build_type:
+        required: false
+        type: string
+
+jobs:
+  # this is more work than I'd expect, but this is how you pass info after
+  # operating on it in a job/step
+  # TODO: factor this out into an action?
+  # parse the inputs from the workflow call that'll be used by strategy.matrix
+  define_matrix:
+    runs-on: ubuntu-22.04
+    # define the outputs that will come from the steps below
+    outputs:
+      build_type: ${{ steps.build_type.outputs.build_type }}
+      precision: ${{ steps.precision.outputs.precision }}
+    steps:
+      - name: Define build_type
+        id: build_type
+        env:
+          # if empty (i.e., triggered by PR) make ALL default
+          btype: ${{ inputs.build_type || 'ALL' }}
+        # this is a little over-cautious, since the 'else' should never happen
+        run: |
+          case ${{ env.btype }} in
+            "Debug")
+              echo 'build_type=["Debug"]' >> "$GITHUB_OUTPUT" ;;
+            "Release")
+              echo 'build_type=["Release"]' >> "$GITHUB_OUTPUT" ;;
+            "ALL")
+              echo 'build_type=["Debug", "Release"]' >> "$GITHUB_OUTPUT" ;;
+            *)
+              echo 'build_type=["Debug", "Release"]' >> "$GITHUB_OUTPUT" ;;
+          esac
+      - name: Define precision
+        id: precision
+        env:
+          prec: ${{ inputs.precision || 'ALL' }}
+        run: |
+          case ${{ env.prec }} in
+            "Debug")
+              echo 'precision=["single"]' >> "$GITHUB_OUTPUT" ;;
+            "Release")
+              echo 'precision=["double"]' >> "$GITHUB_OUTPUT" ;;
+            "ALL")
+              echo 'precision=["single", "double"]' >> "$GITHUB_OUTPUT" ;;
+            *)
+              echo 'precision=["single", "double"]' >> "$GITHUB_OUTPUT" ;;
+          esac
+  gcc-cuda:
+    runs-on:  [self-hosted, m4xci-snl-hip, hip, gcc]
+    # will run other tests in the matrix even if one fails
+    # NOTE: prioritizes extra info over speed, so consider whether this makes sense
+    continue-on-error: false
+    needs: define_matrix
+    # A build matrix storing all desired configurations.
+    strategy:
+      fail-fast: true
+      matrix:
+        # to get the array instead of a string, need the fromJSON()
+        build-type: ${{ fromJSON(needs.define_matrix.outputs.build_type) }}
+        fp-precision: ${{ fromJSON(needs.define_matrix.outputs.precision) }}
+    name: gcc-hip / ${{ matrix.build-type }} - ${{ matrix.fp-precision }}
+    steps:
+      - name: Check out the repository
+        uses: actions/checkout@v4
+        with:
+          persist-credentials: false
+          show-progress: false
+          submodules: recursive
+      - name: Cloning Haero
+        uses: actions/checkout@v4
+        with:
+          repository: eagles-project/haero
+          submodules: recursive
+          path: haero_src
+      - name: Show action trigger
+        uses: ./.github/actions/show-workflow-trigger
+      - name: Building Haero (${{ matrix.build-type }}, ${{ matrix.fp-precision }} precision)
+        run: |
+          cmake -S haero_src -B haero_build \
+            -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
+            -DCMAKE_INSTALL_PREFIX="haero_install" \
+            -DCMAKE_C_COMPILER=gcc \
+            -DCMAKE_CXX_COMPILER=hipcc \
+            -DHAERO_SKIP_FIND_YAML_CPP=ON \
+            -DHAERO_ENABLE_MPI=OFF \
+            -DHAERO_ENABLE_GPU=ON \
+            -DHAERO_PRECISION=${{ matrix.fp-precision }} \
+            -DKokkos_ARCH_AMD_GFX90A=ON \
+            -DHAERO_DEVICE_ARCH=AMD_GFX90A
+          cd haero_build
+          make -j
+          make install
+      - name: Configuring MAM4xx (${{ matrix.build-type }}, ${{ matrix.fp-precision }} precision)
+        run: |
+          cmake -S . -B build \
+            -DCMAKE_CXX_COMPILER=hipcc \
+            -DCMAKE_C_COMPILER=gcc \
+            -DCMAKE_INSTALL_PREFIX=$(pwd)/install \
+            -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
+            -DMAM4XX_HAERO_DIR=$(pwd)/haero_install \
+            -DNUM_VERTICAL_LEVELS=72 \
+            -DENABLE_COVERAGE=OFF \
+            -DENABLE_SKYWALKER=ON \
+            -DCMAKE_CUDA_ARCHITECTURES=AMD_GFX90A \
+            -G "Unix Makefiles"
+      - name: Building MAM4xx (${{ matrix.build-type }}, ${{ matrix.fp-precision }} precision)
+        run: |
+          cd build
+          make -j
+      - name: Running tests (${{ matrix.build-type }}, ${{ matrix.fp-precision }} precision)
+        run: |
+          cd build
+          ctest -V --output-on-failure
diff --git a/.github/workflows/m4x_autotester_main.yml b/.github/workflows/m4x_autotester_main.yml
index b48fba26d..5f6afb937 100644
--- a/.github/workflows/m4x_autotester_main.yml
+++ b/.github/workflows/m4x_autotester_main.yml
@@ -31,6 +31,7 @@ on:
         default: 'GPU-NVIDIA_H100'
         options:
           - GPU-NVIDIA_H100
+          - GPU-AMD_MI200-series
           - CPU-Ubuntu_22-04
           - ALL
       precision:
@@ -71,6 +72,10 @@ jobs:
     if: ${{ github.event.pull_request || github.event.schedule }}
     uses:
       ./.github/workflows/at2_gcc-cuda.yml
+  gcc-hip:
+    if: ${{ github.event.pull_request || github.event.schedule }}
+    uses:
+      ./.github/workflows/at2_gcc-hip.yml
   gcc-cpu_gh:
     if: ${{ github.event.pull_request || github.event.schedule }}
     secrets:
@@ -89,6 +94,14 @@ jobs:
       build_type: ${{ github.event.inputs.build_type }}
     uses:
       "./.github/workflows/at2_gcc-cuda.yml"
+  manual-gpu_hip:
+    if: ${{ contains(github.event.inputs.architecture, 'GPU-AMD_MI200-series') ||
+            contains(github.event.inputs.architecture, 'ALL') }}
+    with:
+      precision: ${{ github.event.inputs.precision }}
+      build_type: ${{ github.event.inputs.build_type }}
+    uses:
+      "./.github/workflows/at2_gcc-hip.yml"
   manual-cpu_gh:
     if: ${{ contains(github.event.inputs.architecture, 'CPU-Ubuntu_22-04') ||
             contains(github.event.inputs.architecture, 'ALL') }}

From 57c5c05c7e553a8a2d710c6ecb0b293ddbb948ee Mon Sep 17 00:00:00 2001
From: Michael J Schmidt <mjschm@sandia.gov>
Date: Fri, 22 Aug 2025 13:12:07 -0600
Subject: [PATCH 3/4] correct precision choosing bug for manual trigger

---
 .github/workflows/at2_gcc-cuda.yml | 7 +++----
 .github/workflows/at2_gcc-hip.yml  | 7 +++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/at2_gcc-cuda.yml b/.github/workflows/at2_gcc-cuda.yml
index cc0f2a347..3d4cb98d0 100644
--- a/.github/workflows/at2_gcc-cuda.yml
+++ b/.github/workflows/at2_gcc-cuda.yml
@@ -45,9 +45,9 @@ jobs:
           prec: ${{ inputs.precision || 'ALL' }}
         run: |
           case ${{ env.prec }} in
-            "Debug")
+            "single")
               echo 'precision=["single"]' >> "$GITHUB_OUTPUT" ;;
-            "Release")
+            "double")
               echo 'precision=["double"]' >> "$GITHUB_OUTPUT" ;;
             "ALL")
               echo 'precision=["single", "double"]' >> "$GITHUB_OUTPUT" ;;
@@ -56,8 +56,7 @@ jobs:
           esac
   gcc-cuda:
     runs-on:  [self-hosted, m4xci-snl-cuda, cuda, gcc]
-    # will run other tests in the matrix even if one fails
-    # NOTE: prioritizes extra info over speed, so consider whether this makes sense
+    # will not run other tests in the matrix if one fails
     continue-on-error: false
     needs: define_matrix
     # A build matrix storing all desired configurations.
diff --git a/.github/workflows/at2_gcc-hip.yml b/.github/workflows/at2_gcc-hip.yml
index 8cbd94ec3..c9e669911 100644
--- a/.github/workflows/at2_gcc-hip.yml
+++ b/.github/workflows/at2_gcc-hip.yml
@@ -45,9 +45,9 @@ jobs:
           prec: ${{ inputs.precision || 'ALL' }}
         run: |
           case ${{ env.prec }} in
-            "Debug")
+            "single")
               echo 'precision=["single"]' >> "$GITHUB_OUTPUT" ;;
-            "Release")
+            "double")
               echo 'precision=["double"]' >> "$GITHUB_OUTPUT" ;;
             "ALL")
               echo 'precision=["single", "double"]' >> "$GITHUB_OUTPUT" ;;
@@ -56,8 +56,7 @@ jobs:
           esac
   gcc-cuda:
     runs-on:  [self-hosted, m4xci-snl-hip, hip, gcc]
-    # will run other tests in the matrix even if one fails
-    # NOTE: prioritizes extra info over speed, so consider whether this makes sense
+    # will not run other tests in the matrix if one fails
     continue-on-error: false
     needs: define_matrix
     # A build matrix storing all desired configurations.

From 24f926c3c1d686376412c423c7931533c16333c6 Mon Sep 17 00:00:00 2001
From: Michael J Schmidt <mjschm@sandia.gov>
Date: Fri, 22 Aug 2025 15:57:04 -0600
Subject: [PATCH 4/4] update READMEs

---
 .github/workflows/AT2-README.md | 27 +++++++++++++++++----------
 .github/workflows/README.md     | 25 +++++++++++++++++--------
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/AT2-README.md b/.github/workflows/AT2-README.md
index f69a13a4e..78ce3fc74 100644
--- a/.github/workflows/AT2-README.md
+++ b/.github/workflows/AT2-README.md
@@ -1,5 +1,6 @@
 # Autotester2 (AT2) Workflow for MAM4xx
- This document contains a brief description of how AT2 is used to automate testing on SNL hardware.
+
+This document contains a brief description of how AT2 is used to automate testing on SNL hardware.
 Additionally, any helpful notes and TODOs may be kept here to assist developers.
 
 ## Overview
@@ -10,9 +11,10 @@ This is done for security/policy reasons and ensures that only those with approv
 
 ### Test Hardware and Compiler Configurations
 
-| Test Name            | GPU Brand | GPU Type | Micoarchitecture | Compute Capability | Machine | Compilers                    |
-| -------------------- | --------- | -------- | ---------------- | ------------------ | ------- | ---------------------------- |
-| gcc_12-3-0_cuda_12-1 | NVIDIA    | H100     | Hopper           | 9.0                | blake   | `gcc` 12.3.0/`nvcc` 12.1.105 |
+| Test Name                         | GPU Brand | GPU Type    | Microarchitecture | Compute Capability | Machine | OS     | Compilers                        |
+| --------------------------------- | --------- | ------------| ----------------- | ------------------ | ------- | ------ | -------------------------------- |
+| GPU AT2 gcc 12.3 cuda 12.1        | NVIDIA    | H100        | Hopper            | 9.0                | blake   | RHEL8  | `gcc` 12.3.0/`nvcc` 12.1.105     |
+| GPU AT2 gcc 13.3 hip 6.2          | AMD       | MI250/MI210 | AMD_GFX90A        | N/A                | caraway | RHEL9  | `gcc` 13.3.0/`hipcc` 6.2.41133-0 |
 
 ### The Flow of the CI Workflow
 
@@ -24,7 +26,8 @@ As of now, the image is of a UBI 8 system, with Spack-installed compilers and al
 
 #### Triggering the Testing Workflow
 
-This autotesting workflow is triggered by opening a pull request to `main` and also by a handful of actions on such a PR that is already open, including:
+This autotesting workflow is triggered by opening a pull request to `main` and
+also by a handful of actions on such a PR that is already open, including:
 
 - `reopened`
 - `ready_for_review`
@@ -40,8 +43,8 @@ or
 
 > **Actions** -> `<Previously-run SNL-AT2 Workflow/Job>` -> **Re-run `[all,this]` job(s)**.
 
-The AT2 configuration on `blake` currently attempts to keep 3 runners available
-to accept jobs at all times.
+The AT2 configuration on `blake` and `caraway` currently attempts to keep 3
+runners per machine available to accept jobs at all times.
 This workflow is configured to allow concurrent testing, so up to 3 test-matrix
 configurations can run at once.
 The concurrency setting is also configured to kill any active job if another
@@ -58,13 +61,17 @@ instance of this workflow is started for the same PR ref.
 
 ## Development Details
 
-Most of the required configuration is provided by the AT2 docs and instructional Confluence page (on the Sandia network :confused:--reach out if you need access).
+Most of the required configuration is provided by the AT2 docs and instructional
+Confluence page (on the Sandia network :confused:--reach out if you need access).
 However, some non-obvious choices and configurations are listed here.
 
-- To add some info to the testing output, we employ a custom action, cribbed from E3SM/EAMxx, that prints out the workflow's trigger.
+- To add some info to the testing output, we employ a custom action, cribbed
+from E3SM/EAMxx, that prints out the workflow's trigger.
 
 ### Hacks
 
+- [ ] FIXME(@mjs): This should not be necessary any more, after the changes to the haero build. `build-haero.sh` should be functional for this build now.
+
 - For whatever reason, Skywalker does not like building in the `gcc_12-3-0_cuda_12-1` container for the H100 GPU.
   - This appears to be an issue of the (Haero?) build not auto-detecting the correct Compute Capability (CC 9.0 => `sm_90`).
   - To overcome this, we first obtain the CC flag via `nvidia-smi` within the testing container.
@@ -77,4 +84,4 @@ However, some non-obvious choices and configurations are listed here.
   - One token used to fetch and read/write runner information.
   - **Expires 11 April 2026**
   - One token used fetch and read repository information via the API.
-  - **Expires 2 May 2025**
+  - **Expires 6 May 2026**
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index 1a267198d..955a0cdff 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -12,17 +12,18 @@ To do this, testing is initialized via the top-level workflow, `MAM4xx Autoteste
 
 #### GPU-based Testing
 
-| Test Name                         | GPU Brand | GPU Type | Microarchitecture | Compute Capability | Machine | Compilers                    |
-| --------------------------------- | --------- | -------- | ---------------- | ------------------ | ------- | ---------------------------- |
-| GPU AT2 gcc 12.3 cuda 12.1        | NVIDIA    | H100     | Hopper           | 9.0                | blake   | `gcc` 12.3.0/`nvcc` 12.1.105 |
+| Test Name                         | GPU Brand | GPU Type    | Microarchitecture | Compute Capability | Machine | OS     | Compilers                        |
+| --------------------------------- | --------- | ------------| ----------------- | ------------------ | ------- | ------ | -------------------------------- |
+| GPU AT2 gcc 12.3 cuda 12.1        | NVIDIA    | H100        | Hopper            | 9.0                | blake   | RHEL8  | `gcc` 12.3.0/`nvcc` 12.1.105     |
+| GPU AT2 gcc 13.3 hip 6.2          | AMD       | MI250/MI210 | AMD_GFX90A        | N/A                | caraway | RHEL9  | `gcc` 13.3.0/`hipcc` 6.2.41133-0 |
 
 #### CPU-based Testing
 
-**Note:** These are the current specs for GitHub's Ubuntu 22.04 runner and are subject to change.
+**Note:** These are the *current* specs for GitHub's Ubuntu 22.04 runner and are subject to change.
 
-| Test Name                                    | OS                   | Machine        | Compiler   |
-| -------------------------------------------- | -------------------- | -------------- | ---------- |
-| GitHub CPU Auto-test Ubuntu 22.04[^gh-ubu2204] | Linux - Ubuntu 22.04 | GitHub Runners | `gcc` 12.3 |
+| Test Name                               | OS                   | Machine        | Compiler   |
+| --------------------------------------- | -------------------- | -------------- | ---------- |
+| CPU GH-runner Ubuntu 22.04[^gh-ubu2204] | Linux - Ubuntu 22.04 | GitHub Runners | `gcc` 12.3 |
 
 ### The Flow of the CI Workflow
 
@@ -48,6 +49,13 @@ Based on the trigger and/or inputs, `MAM4xx Autotester` dispatches sub-workflows
 - ***Note:*** AT2 = "Autotester 2," the second generation of a Sandia-developed GitHub-based testing product.
 - See the [AT2 README](./AT2-README.md) for details about the implementation of the AT2 product.
 
+#### GPU AT2 `gcc` 13.3 `hip` 6.2
+
+- This is largely identical to the above CUDA-based workflow, the salient difference being that we run on AMD hardware, using the `hipcc` C++ compiler.
+- The `caraway` machine has 2 different AMD_GFX90A-architecture MI200-series GPUs available, MI210 and MI250.
+- As of the time of writing, autotesting jobs are assigned one or the other based on availability, to speed up matters.
+  - ***Note:*** This could change based on future needs.
+
 #### GitHub CPU Auto-test Ubuntu 22.04
 
 - The full version of this test runs a "matrix-strategy" test running all combinations of
@@ -86,6 +94,7 @@ The current options when manually triggering a workflow are:
 - Test Machine Architecture
   - Current Options:
     - `GPU-NVIDIA_H100`
+    - `GPU-AMD_MI200-series`
     - `CPU-Ubuntu_22-04`
     - `ALL`
 - Floating-point Precision
@@ -135,7 +144,7 @@ Refer to the section on [Other Types of Job Control](./AT2-README.md#other-types
 - [x] Unify all CI into a single top-level yaml file that calls the sub-cases.
   - This should provide finer control over what runs and when.
   - @mjschmidt271
-- [ ] Add testing for AMD GPUs on `caraway`.
+- [x] Add testing for AMD GPUs on `caraway`.
   - @jaelynlitz - WIP
 
 ### Low-priority