From 03732bc0d37074af4322aba8b185cfea82fef600 Mon Sep 17 00:00:00 2001 From: bertiethorpe <84867280+bertiethorpe@users.noreply.github.com> Date: Mon, 16 Sep 2024 13:50:10 +0100 Subject: [PATCH 01/47] Update openstack.pkr.hcl --- packer/openstack.pkr.hcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index 5f66c0320..0ec27f8e0 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -186,7 +186,7 @@ source "openstack" "openhpc" { # Output image: image_disk_format = "qcow2" image_visibility = var.image_visibility - image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" + image_name = "${source.name}-${var.os_version}-latest" } build { From 06ee0f2b66ec29ed9d2027f27de8805d2f4a612c Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Mon, 16 Sep 2024 14:20:33 +0000 Subject: [PATCH 02/47] new image build workflow --- .github/workflows/fatimage.yml | 5 +- .github/workflows/imagebuild.yml | 126 +++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/imagebuild.yml diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 59eb1b78e..d7810a571 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -1,7 +1,10 @@ -name: Build fat image +name: Build nightly fat image on: workflow_dispatch: + schedule: + - cron: '0 0 * * *' # Run at midnight + jobs: openstack: name: openstack-imagebuild diff --git a/.github/workflows/imagebuild.yml b/.github/workflows/imagebuild.yml new file mode 100644 index 000000000..7502c2201 --- /dev/null +++ b/.github/workflows/imagebuild.yml @@ -0,0 +1,126 @@ + +name: Build new image +on: + workflow_dispatch: +jobs: + openstack: + name: openstack-imagebuild + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build + cancel-in-progress: true + runs-on: ubuntu-22.04 + strategy: + fail-fast: false # allow other matrix jobs to continue even if one fails + matrix: # build RL8+OFED, RL9+OFED, RL9+OFED+CUDA versions + os_version: + - RL8 + - RL9 + build: + - openstack.openhpc-ofed + - openstack.openhpc-cuda + exclude: + - os_version: RL8 + build: openstack.openhpc-cuda + env: + ANSIBLE_FORCE_COLOR: True + OS_CLOUD: openstack + CI_CLOUD: ${{ vars.CI_CLOUD }} + steps: + - uses: actions/checkout@v2 + + - name: Setup ssh + run: | + set -x + mkdir ~/.ssh + echo "${{ secrets[format('{0}_SSH_KEY', vars.CI_CLOUD)] }}" > ~/.ssh/id_rsa + chmod 0600 ~/.ssh/id_rsa + shell: bash + + - name: Add bastion's ssh key to known_hosts + run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts + shell: bash + + - name: Install ansible etc + run: dev/setup-env.sh + + - name: Write clouds.yaml + run: | + mkdir -p ~/.config/openstack/ + echo "${{ secrets[format('{0}_CLOUDS_YAML', vars.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml + shell: bash + + - name: Setup environment + run: | + . venv/bin/activate + . environments/.stackhpc/activate + + - name: Build fat image with packer + id: packer_build + run: | + . venv/bin/activate + . environments/.stackhpc/activate + cd packer/ + packer init . + PACKER_LOG=1 packer build -on-error=${{ vars.PACKER_ON_ERROR }} -only=${{ matrix.build }} -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl openstack.pkr.hcl + env: + PKR_VAR_os_version: ${{ matrix.os_version }} + + - name: Get created image names from manifest + id: manifest + run: | + . venv/bin/activate + IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json) + while ! openstack image show -f value -c name $IMAGE_ID; do + sleep 5 + done + IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID) + echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" + echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" + + - name: Download image + run: | + . venv/bin/activate + sudo mkdir /mnt/images + sudo chmod 777 /mnt/images + openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: install libguestfs + run: | + sudo apt -y update + sudo apt -y install libguestfs-tools + + - name: mkdir for mount + run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' + + - name: mount qcow2 file + run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@0.17.0 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: sarif + output: "${{ steps.manifest.outputs.image-name }}.sarif" + # turn off secret scanning to speed things up + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" + category: "${{ matrix.os_version }}-${{ matrix.build }}" + + - name: Fail if scan has CRITICAL vulnerabilities + uses: aquasecurity/trivy-action@0.16.1 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: table + exit-code: '1' + severity: 'CRITICAL' + ignore-unfixed: true From d6d72ba32026b27c3b0324aac7c866aab2f0bff9 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 17 Sep 2024 09:05:24 +0000 Subject: [PATCH 03/47] dynamically set packer vars from fatimage workflow --- .github/workflows/fatimage.yml | 18 ++++- .github/workflows/imagebuild.yml | 126 ------------------------------- packer/openstack.pkr.hcl | 2 +- 3 files changed, 18 insertions(+), 128 deletions(-) delete mode 100644 .github/workflows/imagebuild.yml diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index d7810a571..e612d40f3 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -64,7 +64,23 @@ jobs: . environments/.stackhpc/activate cd packer/ packer init . - PACKER_LOG=1 packer build -on-error=${{ vars.PACKER_ON_ERROR }} -only=${{ matrix.build }} -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl openstack.pkr.hcl + + SOURCE_IMAGE_NAME="${{ matrix.build }}-${{ matrix.os_version }}-latest" + IMAGE_NAME="${{ matrix.build }}-${{ matrix.os_version }}-latest" + + PACKER_CMD="PACKER_LOG=1 packer build \ + -on-error=${{ vars.PACKER_ON_ERROR }} \ + -only=${{ matrix.build }} \ + -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl \ + openstack.pkr.hcl" + + if [ "${{ github.event_name }}" != "schedule" ]; then + PACKER_CMD="$PACKER_CMD -var source_image_name=$SOURCE_IMAGE_NAME" + else + PACKER_CMD="$PACKER_CMD -var image_name=$IMAGE_NAME" + fi + + eval $PACKER_CMD env: PKR_VAR_os_version: ${{ matrix.os_version }} diff --git a/.github/workflows/imagebuild.yml b/.github/workflows/imagebuild.yml deleted file mode 100644 index 7502c2201..000000000 --- a/.github/workflows/imagebuild.yml +++ /dev/null @@ -1,126 +0,0 @@ - -name: Build new image -on: - workflow_dispatch: -jobs: - openstack: - name: openstack-imagebuild - concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build - cancel-in-progress: true - runs-on: ubuntu-22.04 - strategy: - fail-fast: false # allow other matrix jobs to continue even if one fails - matrix: # build RL8+OFED, RL9+OFED, RL9+OFED+CUDA versions - os_version: - - RL8 - - RL9 - build: - - openstack.openhpc-ofed - - openstack.openhpc-cuda - exclude: - - os_version: RL8 - build: openstack.openhpc-cuda - env: - ANSIBLE_FORCE_COLOR: True - OS_CLOUD: openstack - CI_CLOUD: ${{ vars.CI_CLOUD }} - steps: - - uses: actions/checkout@v2 - - - name: Setup ssh - run: | - set -x - mkdir ~/.ssh - echo "${{ secrets[format('{0}_SSH_KEY', vars.CI_CLOUD)] }}" > ~/.ssh/id_rsa - chmod 0600 ~/.ssh/id_rsa - shell: bash - - - name: Add bastion's ssh key to known_hosts - run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts - shell: bash - - - name: Install ansible etc - run: dev/setup-env.sh - - - name: Write clouds.yaml - run: | - mkdir -p ~/.config/openstack/ - echo "${{ secrets[format('{0}_CLOUDS_YAML', vars.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml - shell: bash - - - name: Setup environment - run: | - . venv/bin/activate - . environments/.stackhpc/activate - - - name: Build fat image with packer - id: packer_build - run: | - . venv/bin/activate - . environments/.stackhpc/activate - cd packer/ - packer init . - PACKER_LOG=1 packer build -on-error=${{ vars.PACKER_ON_ERROR }} -only=${{ matrix.build }} -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl openstack.pkr.hcl - env: - PKR_VAR_os_version: ${{ matrix.os_version }} - - - name: Get created image names from manifest - id: manifest - run: | - . venv/bin/activate - IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json) - while ! openstack image show -f value -c name $IMAGE_ID; do - sleep 5 - done - IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID) - echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" - echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" - - - name: Download image - run: | - . venv/bin/activate - sudo mkdir /mnt/images - sudo chmod 777 /mnt/images - openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: install libguestfs - run: | - sudo apt -y update - sudo apt -y install libguestfs-tools - - - name: mkdir for mount - run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' - - - name: mount qcow2 file - run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' - - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.17.0 - with: - scan-type: fs - scan-ref: "${{ steps.manifest.outputs.image-name }}" - scanners: "vuln" - format: sarif - output: "${{ steps.manifest.outputs.image-name }}.sarif" - # turn off secret scanning to speed things up - - - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" - category: "${{ matrix.os_version }}-${{ matrix.build }}" - - - name: Fail if scan has CRITICAL vulnerabilities - uses: aquasecurity/trivy-action@0.16.1 - with: - scan-type: fs - scan-ref: "${{ steps.manifest.outputs.image-name }}" - scanners: "vuln" - format: table - exit-code: '1' - severity: 'CRITICAL' - ignore-unfixed: true diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index 0ec27f8e0..5f66c0320 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -186,7 +186,7 @@ source "openstack" "openhpc" { # Output image: image_disk_format = "qcow2" image_visibility = var.image_visibility - image_name = "${source.name}-${var.os_version}-latest" + image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" } build { From ce2afdbea80e16ef5891ed484b6e9e490c315f90 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 17 Sep 2024 10:08:35 +0000 Subject: [PATCH 04/47] remove openstack. prefix from image name --- .github/workflows/fatimage.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index e612d40f3..91b3b589f 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -1,5 +1,5 @@ -name: Build nightly fat image +name: Build fat image on: workflow_dispatch: schedule: @@ -65,8 +65,9 @@ jobs: cd packer/ packer init . - SOURCE_IMAGE_NAME="${{ matrix.build }}-${{ matrix.os_version }}-latest" - IMAGE_NAME="${{ matrix.build }}-${{ matrix.os_version }}-latest" + MATRIX_BUILD="${{ matrix.build }}" + SOURCE_IMAGE_NAME="${MATRIX_BUILD#openstack.}-${{ matrix.os_version }}-latest" + IMAGE_NAME="${MATRIX_BUILD#openstack.}-${{ matrix.os_version }}-latest" PACKER_CMD="PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ From edbaeb74646e46d33b049d1940eca78535757688 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 17 Sep 2024 10:48:50 +0000 Subject: [PATCH 05/47] echo image name --- .github/workflows/fatimage.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 91b3b589f..326e35469 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -60,25 +60,28 @@ jobs: - name: Build fat image with packer id: packer_build run: | + set -x . venv/bin/activate . environments/.stackhpc/activate cd packer/ packer init . MATRIX_BUILD="${{ matrix.build }}" - SOURCE_IMAGE_NAME="${MATRIX_BUILD#openstack.}-${{ matrix.os_version }}-latest" - IMAGE_NAME="${MATRIX_BUILD#openstack.}-${{ matrix.os_version }}-latest" + CLEAN_BUILD="${MATRIX_BUILD#openstack.}" + SOURCE_IMAGE_NAME="${CLEAN_BUILD}-${{ matrix.os_version }}-latest" + IMAGE_NAME="${CLEAN_BUILD}-${{ matrix.os_version }}-latest" PACKER_CMD="PACKER_LOG=1 packer build \ + -debug \ -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl \ openstack.pkr.hcl" if [ "${{ github.event_name }}" != "schedule" ]; then - PACKER_CMD="$PACKER_CMD -var source_image_name=$SOURCE_IMAGE_NAME" + PACKER_CMD="$PACKER_CMD -var 'source_image_name=$SOURCE_IMAGE_NAME'" else - PACKER_CMD="$PACKER_CMD -var image_name=$IMAGE_NAME" + PACKER_CMD="$PACKER_CMD -var 'image_name=$IMAGE_NAME'" fi eval $PACKER_CMD From 47391add93e8db082007caa924718daf566e4619 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 17 Sep 2024 12:57:50 +0000 Subject: [PATCH 06/47] make image_name var in packer config --- .github/workflows/fatimage.yml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 326e35469..d430459c4 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -68,7 +68,11 @@ jobs: MATRIX_BUILD="${{ matrix.build }}" CLEAN_BUILD="${MATRIX_BUILD#openstack.}" - SOURCE_IMAGE_NAME="${CLEAN_BUILD}-${{ matrix.os_version }}-latest" + # SOURCE_IMAGE_NAME="${CLEAN_BUILD}-${{ matrix.os_version }}-latest" + SOURCE_IMAGE_NAME="{ + RL8: "${CLEAN_BUILD}-${{ matrix.os_version }}-latest" + RL9: "${CLEAN_BUILD}-${{ matrix.os_version }}-latest" + }" IMAGE_NAME="${CLEAN_BUILD}-${{ matrix.os_version }}-latest" PACKER_CMD="PACKER_LOG=1 packer build \ @@ -76,13 +80,14 @@ jobs: -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl \ + -var "source_image_name=${SOURCE_IMAGE_NAME}" \ openstack.pkr.hcl" - if [ "${{ github.event_name }}" != "schedule" ]; then - PACKER_CMD="$PACKER_CMD -var 'source_image_name=$SOURCE_IMAGE_NAME'" - else - PACKER_CMD="$PACKER_CMD -var 'image_name=$IMAGE_NAME'" - fi + # if [ "${{ github.event_name }}" != "schedule" ]; then + # PACKER_CMD="$PACKER_CMD -var 'source_image_name[${{ matrix.os_version }}]=${SOURCE_IMAGE_NAME}'" + # else + # PACKER_CMD="$PACKER_CMD -var 'image_name=$IMAGE_NAME'" + # fi eval $PACKER_CMD env: From 92e6d1c72bbbe900271ba79d81750be6b3cac53f Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Wed, 18 Sep 2024 12:32:27 +0000 Subject: [PATCH 07/47] new changes --- .github/workflows/fatimage-cron.yml | 130 ++++++++++++++++++++++++++++ .github/workflows/fatimage.yml | 23 +---- packer/openstack.pkr.hcl | 12 +-- 3 files changed, 138 insertions(+), 27 deletions(-) create mode 100644 .github/workflows/fatimage-cron.yml diff --git a/.github/workflows/fatimage-cron.yml b/.github/workflows/fatimage-cron.yml new file mode 100644 index 000000000..60537a634 --- /dev/null +++ b/.github/workflows/fatimage-cron.yml @@ -0,0 +1,130 @@ +name: Nightly fat image build +on: + workflow_dispatch: + schedule: + - cron: '0 0 * * *' # Run at midnight + +jobs: + openstack: + name: openstack-imagebuild + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build + cancel-in-progress: true + runs-on: ubuntu-22.04 + strategy: + fail-fast: false # allow other matrix jobs to continue even if one fails + matrix: # build RL8+OFED, RL9+OFED base images + os_version: + - RL8 + - RL9 + build: + - openstack.openhpc-latest + env: + ANSIBLE_FORCE_COLOR: True + OS_CLOUD: openstack + CI_CLOUD: ${{ vars.CI_CLOUD }} + steps: + - uses: actions/checkout@v2 + + - name: Setup ssh + run: | + set -x + mkdir ~/.ssh + echo "${{ secrets[format('{0}_SSH_KEY', vars.CI_CLOUD)] }}" > ~/.ssh/id_rsa + chmod 0600 ~/.ssh/id_rsa + shell: bash + + - name: Add bastion's ssh key to known_hosts + run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts + shell: bash + + - name: Install ansible etc + run: dev/setup-env.sh + + - name: Write clouds.yaml + run: | + mkdir -p ~/.config/openstack/ + echo "${{ secrets[format('{0}_CLOUDS_YAML', vars.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml + shell: bash + + - name: Setup environment + run: | + . venv/bin/activate + . environments/.stackhpc/activate + + - name: Build fat image with packer + id: packer_build + run: | + set -x + . venv/bin/activate + . environments/.stackhpc/activate + cd packer/ + packer init . + + PACKER_LOG=1 packer build \ + -on-error=${{ vars.PACKER_ON_ERROR }} \ + -only=${{ matrix.build }} \ + -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl \ + openstack.pkr.hcl + env: + PKR_VAR_os_version: ${{ matrix.os_version }} + + - name: Get created image names from manifest + id: manifest + run: | + . venv/bin/activate + IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json) + while ! openstack image show -f value -c name $IMAGE_ID; do + sleep 5 + done + IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID) + echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" + echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" + + - name: Download image + run: | + . venv/bin/activate + sudo mkdir /mnt/images + sudo chmod 777 /mnt/images + openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: install libguestfs + run: | + sudo apt -y update + sudo apt -y install libguestfs-tools + + - name: mkdir for mount + run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' + + - name: mount qcow2 file + run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@0.17.0 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: sarif + output: "${{ steps.manifest.outputs.image-name }}.sarif" + # turn off secret scanning to speed things up + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" + category: "${{ matrix.os_version }}-${{ matrix.build }}" + + - name: Fail if scan has CRITICAL vulnerabilities + uses: aquasecurity/trivy-action@0.16.1 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: table + exit-code: '1' + severity: 'CRITICAL' + ignore-unfixed: true diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index d430459c4..ddb962636 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -66,30 +66,11 @@ jobs: cd packer/ packer init . - MATRIX_BUILD="${{ matrix.build }}" - CLEAN_BUILD="${MATRIX_BUILD#openstack.}" - # SOURCE_IMAGE_NAME="${CLEAN_BUILD}-${{ matrix.os_version }}-latest" - SOURCE_IMAGE_NAME="{ - RL8: "${CLEAN_BUILD}-${{ matrix.os_version }}-latest" - RL9: "${CLEAN_BUILD}-${{ matrix.os_version }}-latest" - }" - IMAGE_NAME="${CLEAN_BUILD}-${{ matrix.os_version }}-latest" - - PACKER_CMD="PACKER_LOG=1 packer build \ - -debug \ + PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl \ - -var "source_image_name=${SOURCE_IMAGE_NAME}" \ - openstack.pkr.hcl" - - # if [ "${{ github.event_name }}" != "schedule" ]; then - # PACKER_CMD="$PACKER_CMD -var 'source_image_name[${{ matrix.os_version }}]=${SOURCE_IMAGE_NAME}'" - # else - # PACKER_CMD="$PACKER_CMD -var 'image_name=$IMAGE_NAME'" - # fi - - eval $PACKER_CMD + openstack.pkr.hcl env: PKR_VAR_os_version: ${{ matrix.os_version }} diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index 5f66c0320..b6cebc387 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -132,7 +132,7 @@ variable "volume_size" { type = map(number) default = { # fat image builds, GB: - openhpc = 15 + openhpc-latest = 15 openhpc-ofed = 15 openhpc-cuda = 30 } @@ -153,9 +153,9 @@ variable "groups" { description = "Additional inventory groups (other than 'builder') to add build VM to, keyed by source name" default = { # fat image builds: - openhpc = ["control", "compute", "login"] - openhpc-ofed = ["control", "compute", "login", "ofed"] - openhpc-cuda = ["control", "compute", "login", "ofed", "cuda"] + openhpc-latest = ["ofed"] + openhpc-ofed = ["control", "compute", "login"] + openhpc-cuda = ["control", "compute", "login", "cuda"] } } @@ -191,9 +191,9 @@ source "openstack" "openhpc" { build { - # non-OFED fat image: + # latest fat image: source "source.openstack.openhpc" { - name = "openhpc" + name = "openhpc-latest" } # OFED fat image: From ac08548b14d83ffeec62bbe7f54d0ba0b9d27a3f Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Wed, 18 Sep 2024 12:44:39 +0000 Subject: [PATCH 08/47] fix merge changes --- .github/workflows/fatimage-cron.yml | 9 +++++++++ .github/workflows/fatimage.yml | 2 -- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/fatimage-cron.yml b/.github/workflows/fatimage-cron.yml index 60537a634..249b3d2bd 100644 --- a/.github/workflows/fatimage-cron.yml +++ b/.github/workflows/fatimage-cron.yml @@ -1,6 +1,15 @@ name: Nightly fat image build on: workflow_dispatch: + inputs: + ci_cloud: + description: 'Select the CI_CLOUD' + required: true + type: choice + options: + - LEAFCLOUD + - SMS + - ARCUS schedule: - cron: '0 0 * * *' # Run at midnight diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index b5381127d..2145c7d66 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -11,8 +11,6 @@ on: - LEAFCLOUD - SMS - ARCUS - schedule: - - cron: '0 0 * * *' # Run at midnight jobs: openstack: From 101f696632e2dc30bbef568738b2506b5dc8105c Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Wed, 18 Sep 2024 12:55:42 +0000 Subject: [PATCH 09/47] temp workflow changes --- .github/workflows/fatimage-cron.yml | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/workflows/fatimage-cron.yml b/.github/workflows/fatimage-cron.yml index 249b3d2bd..cb8a61fc8 100644 --- a/.github/workflows/fatimage-cron.yml +++ b/.github/workflows/fatimage-cron.yml @@ -22,38 +22,43 @@ jobs: runs-on: ubuntu-22.04 strategy: fail-fast: false # allow other matrix jobs to continue even if one fails - matrix: # build RL8+OFED, RL9+OFED base images + matrix: # build RL8+OFED, RL9+OFED new base images os_version: - RL8 - RL9 build: - - openstack.openhpc-latest + - openstack.openhpc-ofed + env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack - CI_CLOUD: ${{ vars.CI_CLOUD }} + CI_CLOUD: ${{ github.event.inputs.ci_cloud }} steps: - uses: actions/checkout@v2 + - name: Record settings for CI cloud + run: | + echo CI_CLOUD: ${{ env.CI_CLOUD }} + - name: Setup ssh run: | set -x mkdir ~/.ssh - echo "${{ secrets[format('{0}_SSH_KEY', vars.CI_CLOUD)] }}" > ~/.ssh/id_rsa + echo "${{ secrets[format('{0}_SSH_KEY', env.CI_CLOUD)] }}" > ~/.ssh/id_rsa chmod 0600 ~/.ssh/id_rsa shell: bash - name: Add bastion's ssh key to known_hosts run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts shell: bash - + - name: Install ansible etc run: dev/setup-env.sh - + - name: Write clouds.yaml run: | mkdir -p ~/.config/openstack/ - echo "${{ secrets[format('{0}_CLOUDS_YAML', vars.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml shell: bash - name: Setup environment From cf8a68526b8700c9fcbefc34423375275db332a3 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Wed, 18 Sep 2024 13:10:10 +0000 Subject: [PATCH 10/47] test nightly build --- .github/workflows/fatimage.yml | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 2145c7d66..6bf7a6fc1 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -1,5 +1,4 @@ - -name: Build fat image +name: Nightly fat image build on: workflow_dispatch: inputs: @@ -11,6 +10,8 @@ on: - LEAFCLOUD - SMS - ARCUS + schedule: + - cron: '0 0 * * *' # Run at midnight jobs: openstack: @@ -21,16 +22,13 @@ jobs: runs-on: ubuntu-22.04 strategy: fail-fast: false # allow other matrix jobs to continue even if one fails - matrix: # build RL8+OFED, RL9+OFED, RL9+OFED+CUDA versions + matrix: # build RL8+OFED, RL9+OFED new base images os_version: - RL8 - RL9 build: - - openstack.openhpc-ofed - - openstack.openhpc-cuda - exclude: - - os_version: RL8 - build: openstack.openhpc-cuda + - openstack.openhpc-latest + env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack @@ -53,10 +51,10 @@ jobs: - name: Add bastion's ssh key to known_hosts run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts shell: bash - + - name: Install ansible etc run: dev/setup-env.sh - + - name: Write clouds.yaml run: | mkdir -p ~/.config/openstack/ @@ -67,7 +65,7 @@ jobs: run: | . venv/bin/activate . environments/.stackhpc/activate - + - name: Build fat image with packer id: packer_build run: | @@ -81,6 +79,7 @@ jobs: -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl \ + -var 'source_image_name={"RL8": "openhpc-latest-RL8", "RL9": "openhpc-latest-RL9"}' \ openstack.pkr.hcl env: PKR_VAR_os_version: ${{ matrix.os_version }} From 26cd493b4b11499faa676826eb419acf54cec3bb Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Wed, 18 Sep 2024 14:33:18 +0000 Subject: [PATCH 11/47] change back fatimage workflow --- .github/workflows/fatimage.yml | 7 ++----- packer/openstack.pkr.hcl | 3 ++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 6bf7a6fc1..c8aa7aa23 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -1,4 +1,4 @@ -name: Nightly fat image build +name: Build fat image on: workflow_dispatch: inputs: @@ -10,8 +10,6 @@ on: - LEAFCLOUD - SMS - ARCUS - schedule: - - cron: '0 0 * * *' # Run at midnight jobs: openstack: @@ -22,7 +20,7 @@ jobs: runs-on: ubuntu-22.04 strategy: fail-fast: false # allow other matrix jobs to continue even if one fails - matrix: # build RL8+OFED, RL9+OFED new base images + matrix: # build RL8+OFED, RL9+OFED, RL9+OFED+CUDA versions os_version: - RL8 - RL9 @@ -79,7 +77,6 @@ jobs: -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl \ - -var 'source_image_name={"RL8": "openhpc-latest-RL8", "RL9": "openhpc-latest-RL9"}' \ openstack.pkr.hcl env: PKR_VAR_os_version: ${{ matrix.os_version }} diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index b6cebc387..c9b052f48 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -186,7 +186,8 @@ source "openstack" "openhpc" { # Output image: image_disk_format = "qcow2" image_visibility = var.image_visibility - image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" + image_name = "${source.name}-${var.os_version}" + # -${local.timestamp}-${substr(local.git_commit, 0, 8)}" } build { From cf37a32cfd94aabfce8b3ca56f84be1d0c1387e3 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 19 Sep 2024 10:26:33 +0000 Subject: [PATCH 12/47] rename images built --- .github/workflows/fatimage.yml | 2 +- packer/openstack.pkr.hcl | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index c8aa7aa23..75e2a593e 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -25,7 +25,7 @@ jobs: - RL8 - RL9 build: - - openstack.openhpc-latest + - openstack.rocky-latest env: ANSIBLE_FORCE_COLOR: True diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index c9b052f48..0c166856d 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -132,8 +132,8 @@ variable "volume_size" { type = map(number) default = { # fat image builds, GB: - openhpc-latest = 15 - openhpc-ofed = 15 + rocky-latest = 15 + openhpc = 15 openhpc-cuda = 30 } } @@ -153,8 +153,8 @@ variable "groups" { description = "Additional inventory groups (other than 'builder') to add build VM to, keyed by source name" default = { # fat image builds: - openhpc-latest = ["ofed"] - openhpc-ofed = ["control", "compute", "login"] + rocky-latest = ["ofed"] + openhpc = ["control", "compute", "login"] openhpc-cuda = ["control", "compute", "login", "cuda"] } } @@ -194,12 +194,12 @@ build { # latest fat image: source "source.openstack.openhpc" { - name = "openhpc-latest" + name = "rocky-latest" } # OFED fat image: source "source.openstack.openhpc" { - name = "openhpc-ofed" + name = "openhpc" } # CUDA fat image: From c21d097f65f0dd35b20d89a4d679fad8f2527e33 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 19 Sep 2024 12:45:26 +0000 Subject: [PATCH 13/47] add update to builder group --- environments/common/layouts/everything | 1 + 1 file changed, 1 insertion(+) diff --git a/environments/common/layouts/everything b/environments/common/layouts/everything index 85af46c06..317c140f8 100644 --- a/environments/common/layouts/everything +++ b/environments/common/layouts/everything @@ -29,6 +29,7 @@ slurm_stats [update:children] cluster +builder [fail2ban:children] # Hosts to install fail2ban on to protect SSH From 64918cb312e854c97e29410866a76db244b1d7a9 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 19 Sep 2024 13:53:07 +0000 Subject: [PATCH 14/47] add update to fatimage build groups --- environments/common/layouts/everything | 2 -- packer/openstack.pkr.hcl | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/environments/common/layouts/everything b/environments/common/layouts/everything index 317c140f8..205f1d334 100644 --- a/environments/common/layouts/everything +++ b/environments/common/layouts/everything @@ -28,8 +28,6 @@ slurm_stats # NB: [rebuild] not defined here as this template is used in CI [update:children] -cluster -builder [fail2ban:children] # Hosts to install fail2ban on to protect SSH diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index 0c166856d..084f747d2 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -153,7 +153,7 @@ variable "groups" { description = "Additional inventory groups (other than 'builder') to add build VM to, keyed by source name" default = { # fat image builds: - rocky-latest = ["ofed"] + rocky-latest = ["update", "ofed"] openhpc = ["control", "compute", "login"] openhpc-cuda = ["control", "compute", "login", "cuda"] } From 8e71d6a6fc4e932a35258269e7d779ba18121403 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 19 Sep 2024 15:36:05 +0000 Subject: [PATCH 15/47] fatimage.yml fix --- ansible/fatimage.yml | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ansible/fatimage.yml b/ansible/fatimage.yml index 58e1d72c7..8bbd50262 100644 --- a/ansible/fatimage.yml +++ b/ansible/fatimage.yml @@ -56,10 +56,12 @@ include_role: name: mysql tasks_from: install.yml + when: "'mysql' in group_names" - name: OpenHPC import_role: name: stackhpc.openhpc tasks_from: install.yml + when: "'openhpc' in group_names" # - import_playbook: portal.yml - name: Open Ondemand server (packages) @@ -67,6 +69,7 @@ name: osc.ood tasks_from: install-package.yml vars_from: "Rocky/{{ ansible_distribution_major_version }}.yml" + when: "'openondemand' in group_names" # # FUTURE: install-apps.yml - this is git clones - name: Open Ondemand server (apps) @@ -74,34 +77,40 @@ name: osc.ood tasks_from: install-apps.yml vars_from: "Rocky/{{ ansible_distribution_major_version }}.yml" + when: "'openondemand' in group_names" - name: Open Ondemand remote desktop import_role: name: openondemand tasks_from: vnc_compute.yml + when: "'openondemand_desktop' in group_names" - name: Open Ondemand jupyter node import_role: name: openondemand tasks_from: jupyter_compute.yml + when: "'openondemand' in group_names" # - import_playbook: monitoring.yml: - import_role: name: opensearch tasks_from: install.yml - become: true + when: "'opensearch' in group_names" # slurm_stats - nothing to do - import_role: name: filebeat tasks_from: install.yml + when: "'filebeat' in group_names" - import_role: # can't only run cloudalchemy.node_exporter/tasks/install.yml as needs vars from preflight.yml and triggers service start # however starting node exporter is ok name: cloudalchemy.node_exporter + when: "'node_exporter' in group_names" - name: openondemand exporter dnf: - name: ondemand_exporter + name: ondemand_exporter + when: "'openondemand' in group_names" - name: slurm exporter import_role: @@ -109,7 +118,12 @@ tasks_from: install vars: slurm_exporter_state: stopped + when: "'slurm_exporter' in group_names" +- hosts: prometheus + become: yes + gather_facts: yes + tasks: - import_role: name: cloudalchemy.prometheus tasks_from: preflight.yml @@ -162,6 +176,10 @@ - prometheus - promtool +- hosts: grafana + become: yes + gather_facts: yes + tasks: - name: Include distribution variables for cloudalchemy.grafana include_vars: "{{ appliances_repository_root }}/ansible/roles/cloudalchemy.grafana/vars/redhat.yml" - import_role: From 990ddc12a528c0c7fe9cece343e1992760839e34 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 20 Sep 2024 09:19:22 +0000 Subject: [PATCH 16/47] move output image_name declaration into build blocks --- .github/workflows/fatimage-cron.yml | 144 ---------------------------- packer/openstack.pkr.hcl | 6 +- 2 files changed, 4 insertions(+), 146 deletions(-) delete mode 100644 .github/workflows/fatimage-cron.yml diff --git a/.github/workflows/fatimage-cron.yml b/.github/workflows/fatimage-cron.yml deleted file mode 100644 index cb8a61fc8..000000000 --- a/.github/workflows/fatimage-cron.yml +++ /dev/null @@ -1,144 +0,0 @@ -name: Nightly fat image build -on: - workflow_dispatch: - inputs: - ci_cloud: - description: 'Select the CI_CLOUD' - required: true - type: choice - options: - - LEAFCLOUD - - SMS - - ARCUS - schedule: - - cron: '0 0 * * *' # Run at midnight - -jobs: - openstack: - name: openstack-imagebuild - concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build - cancel-in-progress: true - runs-on: ubuntu-22.04 - strategy: - fail-fast: false # allow other matrix jobs to continue even if one fails - matrix: # build RL8+OFED, RL9+OFED new base images - os_version: - - RL8 - - RL9 - build: - - openstack.openhpc-ofed - - env: - ANSIBLE_FORCE_COLOR: True - OS_CLOUD: openstack - CI_CLOUD: ${{ github.event.inputs.ci_cloud }} - steps: - - uses: actions/checkout@v2 - - - name: Record settings for CI cloud - run: | - echo CI_CLOUD: ${{ env.CI_CLOUD }} - - - name: Setup ssh - run: | - set -x - mkdir ~/.ssh - echo "${{ secrets[format('{0}_SSH_KEY', env.CI_CLOUD)] }}" > ~/.ssh/id_rsa - chmod 0600 ~/.ssh/id_rsa - shell: bash - - - name: Add bastion's ssh key to known_hosts - run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts - shell: bash - - - name: Install ansible etc - run: dev/setup-env.sh - - - name: Write clouds.yaml - run: | - mkdir -p ~/.config/openstack/ - echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml - shell: bash - - - name: Setup environment - run: | - . venv/bin/activate - . environments/.stackhpc/activate - - - name: Build fat image with packer - id: packer_build - run: | - set -x - . venv/bin/activate - . environments/.stackhpc/activate - cd packer/ - packer init . - - PACKER_LOG=1 packer build \ - -on-error=${{ vars.PACKER_ON_ERROR }} \ - -only=${{ matrix.build }} \ - -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl \ - openstack.pkr.hcl - env: - PKR_VAR_os_version: ${{ matrix.os_version }} - - - name: Get created image names from manifest - id: manifest - run: | - . venv/bin/activate - IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json) - while ! openstack image show -f value -c name $IMAGE_ID; do - sleep 5 - done - IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID) - echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" - echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" - - - name: Download image - run: | - . venv/bin/activate - sudo mkdir /mnt/images - sudo chmod 777 /mnt/images - openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: install libguestfs - run: | - sudo apt -y update - sudo apt -y install libguestfs-tools - - - name: mkdir for mount - run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' - - - name: mount qcow2 file - run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' - - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.17.0 - with: - scan-type: fs - scan-ref: "${{ steps.manifest.outputs.image-name }}" - scanners: "vuln" - format: sarif - output: "${{ steps.manifest.outputs.image-name }}.sarif" - # turn off secret scanning to speed things up - - - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" - category: "${{ matrix.os_version }}-${{ matrix.build }}" - - - name: Fail if scan has CRITICAL vulnerabilities - uses: aquasecurity/trivy-action@0.16.1 - with: - scan-type: fs - scan-ref: "${{ steps.manifest.outputs.image-name }}" - scanners: "vuln" - format: table - exit-code: '1' - severity: 'CRITICAL' - ignore-unfixed: true diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index 084f747d2..cb17a4fd0 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -186,8 +186,7 @@ source "openstack" "openhpc" { # Output image: image_disk_format = "qcow2" image_visibility = var.image_visibility - image_name = "${source.name}-${var.os_version}" - # -${local.timestamp}-${substr(local.git_commit, 0, 8)}" + } build { @@ -195,16 +194,19 @@ build { # latest fat image: source "source.openstack.openhpc" { name = "rocky-latest" + image_name = "${source.name}-${var.os_version}" } # OFED fat image: source "source.openstack.openhpc" { name = "openhpc" + image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" } # CUDA fat image: source "source.openstack.openhpc" { name = "openhpc-cuda" + image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" } # Extended site-specific image, built on fat image: From aa0786fb130c49b3ea5ac3625654cf57079556d6 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 20 Sep 2024 15:37:36 +0000 Subject: [PATCH 17/47] delete outdated nightly image --- .github/workflows/fatimage.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 75e2a593e..d4bca3993 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -93,6 +93,17 @@ jobs: echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" + - name: Delete old latest image + run: | + . venv/bin/activate + IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) + if [ "$IMAGE_COUNT" -gt 1 ]; then + OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) + openstack image delete "$OLD_IMAGE_ID" + else + echo "Only one image exists, skipping deletion." + fi + - name: Download image run: | . venv/bin/activate From 6defc9dcff1e15801f4318b92af9c4693d655d6d Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 20 Sep 2024 17:37:46 +0000 Subject: [PATCH 18/47] test new fatimage build --- .github/workflows/fatimage.yml | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index d4bca3993..1e5dba1da 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -25,7 +25,7 @@ jobs: - RL8 - RL9 build: - - openstack.rocky-latest + - openstack.openhpc env: ANSIBLE_FORCE_COLOR: True @@ -77,6 +77,7 @@ jobs: -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl \ + -var 'source_image_name={"${{ matrix.os_version }}":"rocky-latest-${{ matrix.os_version }}"}' \ openstack.pkr.hcl env: PKR_VAR_os_version: ${{ matrix.os_version }} @@ -93,17 +94,6 @@ jobs: echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" - - name: Delete old latest image - run: | - . venv/bin/activate - IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) - if [ "$IMAGE_COUNT" -gt 1 ]; then - OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) - openstack image delete "$OLD_IMAGE_ID" - else - echo "Only one image exists, skipping deletion." - fi - - name: Download image run: | . venv/bin/activate From c29a9dbfe00e4eac007c7ebb0473be218c0d3cab Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 24 Sep 2024 09:30:20 +0000 Subject: [PATCH 19/47] debug dnf remove cockpit --- ansible/bootstrap.yml | 12 ++++++++---- dev/extract_logs.py | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index c43d614db..bbae37cbb 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -147,10 +147,14 @@ become: yes tags: cockpit tasks: - - name: Remove RHEL cockpit - dnf: - name: cockpit-ws - state: "{{ appliances_cockpit_state }}" + - name: Remove RHEL cockpit using dnf command + command: dnf -y -vvvv remove cockpit-ws + register: dnf_remove_output + ignore_errors: true # Avoid failing if a lock or other error happens + + - name: Debug dnf output + debug: + msg: "{{ dnf_remove_output.stdout }}" - hosts: firewalld gather_facts: false diff --git a/dev/extract_logs.py b/dev/extract_logs.py index 91923f1a0..65df0140e 100644 --- a/dev/extract_logs.py +++ b/dev/extract_logs.py @@ -76,7 +76,7 @@ def extract_log_info_and_generate_csv(log_file_path, output_csv_path, target_dir print("Path to workflow log plain text file should be provided as the only arg to this script") sys.exit(1) log_file_path = sys.argv[1] # Input workflow log name -output_csv_path = log_file_path.replace('.txt.', '.csv') # Output CSV name +output_csv_path = log_file_path.replace('.txt', '.csv') # Output CSV name target_directory = '/ansible/' # Shared directory for task path extract_log_info_and_generate_csv(log_file_path, output_csv_path, target_directory) From 3b601f5da247431b8789e8674635b88bc0d29137 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 24 Sep 2024 11:13:38 +0000 Subject: [PATCH 20/47] --amend --- ansible/bootstrap.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index bbae37cbb..9a7b991dc 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -147,15 +147,11 @@ become: yes tags: cockpit tasks: - - name: Remove RHEL cockpit using dnf command - command: dnf -y -vvvv remove cockpit-ws + - name: Remove RHEL cockpit + command: dnf -y remove cockpit-ws register: dnf_remove_output ignore_errors: true # Avoid failing if a lock or other error happens - - name: Debug dnf output - debug: - msg: "{{ dnf_remove_output.stdout }}" - - hosts: firewalld gather_facts: false become: yes From f3d0b67fb5d21e3c02c1927ac52ceaba558f2669 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 24 Sep 2024 13:32:06 +0000 Subject: [PATCH 21/47] add cuda build back in --- .github/workflows/fatimage.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 1e5dba1da..3fbea616f 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -26,6 +26,10 @@ jobs: - RL9 build: - openstack.openhpc + - openstack.openhpc-cuda + exclude: + - os_version: RL8 + build: openstack.openhpc-cuda env: ANSIBLE_FORCE_COLOR: True From 35048ac15cc0ce7bc90057fe0967a38914b12505 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 26 Sep 2024 08:45:20 +0000 Subject: [PATCH 22/47] cuda nightly build --- .github/workflows/fatimage.yml | 8 +++++++- packer/openstack.pkr.hcl | 13 +++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 3fbea616f..ac74727ef 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -77,11 +77,17 @@ jobs: cd packer/ packer init . + if [[ "${{ matrix.build }}" == *"cuda"* ]]; then + SOURCE_IMAGE_NAME="rocky-latest-cuda" + else + SOURCE_IMAGE_NAME="rocky-latest" + fi + PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl \ - -var 'source_image_name={"${{ matrix.os_version }}":"rocky-latest-${{ matrix.os_version }}"}' \ + -var "source_image_name={\"${{ matrix.os_version }}\":\"${SOURCE_IMAGE_NAME}-${{ matrix.os_version }}\"}" \ openstack.pkr.hcl env: PKR_VAR_os_version: ${{ matrix.os_version }} diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index cb17a4fd0..3f41ff041 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -133,6 +133,7 @@ variable "volume_size" { default = { # fat image builds, GB: rocky-latest = 15 + rocky-latest-cuda = 30 openhpc = 15 openhpc-cuda = 30 } @@ -154,8 +155,9 @@ variable "groups" { default = { # fat image builds: rocky-latest = ["update", "ofed"] + rocky-latest-cuda = ["update", "ofed", "cuda"] openhpc = ["control", "compute", "login"] - openhpc-cuda = ["control", "compute", "login", "cuda"] + openhpc-cuda = ["control", "compute", "login"] } } @@ -191,12 +193,18 @@ source "openstack" "openhpc" { build { - # latest fat image: + # latest nightly image: source "source.openstack.openhpc" { name = "rocky-latest" image_name = "${source.name}-${var.os_version}" } + # latest nightly cuda image: + source "source.openstack.openhpc" { + name = "rocky-latest-cuda" + image_name = "${source.name}-${var.os_version}" + } + # OFED fat image: source "source.openstack.openhpc" { name = "openhpc" @@ -212,6 +220,7 @@ build { # Extended site-specific image, built on fat image: source "source.openstack.openhpc" { name = "openhpc-extra" + image_name = "${source.name}-${var.os_version}-${local.timestamp}-${substr(local.git_commit, 0, 8)}" } provisioner "ansible" { From d16ef507a96fe43d7653fa14d2ec1c425b05cf0b Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 26 Sep 2024 09:55:21 +0000 Subject: [PATCH 23/47] test cuda nightly builds --- .github/workflows/fatimage.yml | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index ac74727ef..502acf1e7 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -10,6 +10,8 @@ on: - LEAFCLOUD - SMS - ARCUS + schedule: + - cron: '0 0 * * *' # Run at midnight jobs: openstack: @@ -25,16 +27,16 @@ jobs: - RL8 - RL9 build: - - openstack.openhpc - - openstack.openhpc-cuda + - openstack.rocky-latest + - openstack.rocky-latest-cuda exclude: - os_version: RL8 - build: openstack.openhpc-cuda + build: openstack.rocky-latest-cuda env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack - CI_CLOUD: ${{ github.event.inputs.ci_cloud }} + CI_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} steps: - uses: actions/checkout@v2 @@ -77,18 +79,12 @@ jobs: cd packer/ packer init . - if [[ "${{ matrix.build }}" == *"cuda"* ]]; then - SOURCE_IMAGE_NAME="rocky-latest-cuda" - else - SOURCE_IMAGE_NAME="rocky-latest" - fi - PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ - -var-file=$PKR_VAR_environment_root/${{ vars.CI_CLOUD }}.pkrvars.hcl \ - -var "source_image_name={\"${{ matrix.os_version }}\":\"${SOURCE_IMAGE_NAME}-${{ matrix.os_version }}\"}" \ + -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ openstack.pkr.hcl + env: PKR_VAR_os_version: ${{ matrix.os_version }} @@ -104,11 +100,23 @@ jobs: echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" + - name: Delete old latest image + run: | + . venv/bin/activate + IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) + if [ "$IMAGE_COUNT" -gt 1 ]; then + OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) + openstack image delete "$OLD_IMAGE_ID" + else + echo "Only one image exists, skipping deletion." + fi + - name: Download image run: | . venv/bin/activate sudo mkdir /mnt/images sudo chmod 777 /mnt/images + openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-name }}" openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} - name: Set up QEMU From b98d803f81647c3cb771c51e3356de28a7dd2001 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 27 Sep 2024 08:18:52 +0000 Subject: [PATCH 24/47] test new fatimage build on SMS --- .github/workflows/fatimage.yml | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 502acf1e7..0ec339b38 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -10,8 +10,6 @@ on: - LEAFCLOUD - SMS - ARCUS - schedule: - - cron: '0 0 * * *' # Run at midnight jobs: openstack: @@ -27,16 +25,16 @@ jobs: - RL8 - RL9 build: - - openstack.rocky-latest - - openstack.rocky-latest-cuda + - openstack.openhpc + - openstack.openhpc-cuda exclude: - os_version: RL8 - build: openstack.rocky-latest-cuda + build: openstack.openhpc-cuda env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack - CI_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} + CI_CLOUD: ${{ github.event.inputs.ci_cloud }} steps: - uses: actions/checkout@v2 @@ -79,12 +77,18 @@ jobs: cd packer/ packer init . + if [[ "${{ matrix.build }}" == *"cuda"* ]]; then + SOURCE_IMAGE_NAME="rocky-latest-cuda" + else + SOURCE_IMAGE_NAME="rocky-latest" + fi + PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ + -var "source_image_name={\"${{ matrix.os_version }}\":\"${SOURCE_IMAGE_NAME}-${{ matrix.os_version }}\"}" \ openstack.pkr.hcl - env: PKR_VAR_os_version: ${{ matrix.os_version }} @@ -100,23 +104,11 @@ jobs: echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" - - name: Delete old latest image - run: | - . venv/bin/activate - IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) - if [ "$IMAGE_COUNT" -gt 1 ]; then - OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) - openstack image delete "$OLD_IMAGE_ID" - else - echo "Only one image exists, skipping deletion." - fi - - name: Download image run: | . venv/bin/activate sudo mkdir /mnt/images sudo chmod 777 /mnt/images - openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-name }}" openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} - name: Set up QEMU From 79f68934a0ea84b844b1c9f441c9145f0ef8c41b Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 27 Sep 2024 16:16:51 +0000 Subject: [PATCH 25/47] test image upploads across clouds --- .github/workflows/fatimage.yml | 117 +++++++++++++++++++++++++++++---- 1 file changed, 105 insertions(+), 12 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 0ec339b38..c4a88a6bb 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -1,4 +1,4 @@ -name: Build fat image +name: Build nightly image on: workflow_dispatch: inputs: @@ -10,6 +10,8 @@ on: - LEAFCLOUD - SMS - ARCUS + schedule: + - cron: '0 0 * * *' # Run at midnight jobs: openstack: @@ -25,16 +27,16 @@ jobs: - RL8 - RL9 build: - - openstack.openhpc - - openstack.openhpc-cuda + - openstack.rocky-latest + - openstack.rocky-latest-cuda exclude: - os_version: RL8 - build: openstack.openhpc-cuda + build: openstack.rocky-latest-cuda env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack - CI_CLOUD: ${{ github.event.inputs.ci_cloud }} + CI_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} steps: - uses: actions/checkout@v2 @@ -77,18 +79,12 @@ jobs: cd packer/ packer init . - if [[ "${{ matrix.build }}" == *"cuda"* ]]; then - SOURCE_IMAGE_NAME="rocky-latest-cuda" - else - SOURCE_IMAGE_NAME="rocky-latest" - fi - PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ - -var "source_image_name={\"${{ matrix.os_version }}\":\"${SOURCE_IMAGE_NAME}-${{ matrix.os_version }}\"}" \ openstack.pkr.hcl + env: PKR_VAR_os_version: ${{ matrix.os_version }} @@ -104,11 +100,23 @@ jobs: echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" + - name: Delete old latest image + run: | + . venv/bin/activate + IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) + if [ "$IMAGE_COUNT" -gt 1 ]; then + OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) + openstack image delete "$OLD_IMAGE_ID" + else + echo "Only one image exists, skipping deletion." + fi + - name: Download image run: | . venv/bin/activate sudo mkdir /mnt/images sudo chmod 777 /mnt/images + openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-name }}" openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} - name: Set up QEMU @@ -151,3 +159,88 @@ jobs: exit-code: '1' severity: 'CRITICAL' ignore-unfixed: true + + upload: + name: upload-nightly-targets + needs: openstack + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.image }}-${{ matrix.target_cloud }} + cancel-in-progress: true + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + target_cloud: + - LEAFCLOUD + - SMS + - ARCUS + os_version: + - RL8 + - RL9 + image: + - rocky-latest + - rocky-latest-cuda + exclude: + - os_version: RL8 + image: rocky-latest-cuda + - target_cloud: LEAFCLOUD + env: + OS_CLOUD: openstack + SOURCE_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} + TARGET_CLOUD: ${{ matrix.target_cloud }} + IMAGE_NAME: "${{ matrix.image }}-${{ matrix.os_version }}" + steps: + - uses: actions/checkout@v2 + + - name: Record settings for CI cloud + run: | + echo SOURCE_CLOUD: ${{ env.SOURCE_CLOUD }} + echo TARGET_CLOUD: ${{ env.TARGET_CLOUD }} + + - name: Install openstackclient + run: | + python3 -m venv venv + . venv/bin/activate + pip install -U pip + pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt) + shell: bash + + - name: Write clouds.yaml + run: | + mkdir -p ~/.config/openstack/ + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.SOURCE_CLOUD)] }}" > ~/.config/openstack/source_clouds.yaml + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.TARGET_CLOUD)] }}" > ~/.config/openstack/target_clouds.yaml + shell: bash + + - name: Download source image + run: | + . venv/bin/activate + export OS_CLIENT_CONFIG_FILE=~/.config/openstack/source_clouds.yaml + openstack image save --file ${{ env.IMAGE_NAME }} ${{ env.IMAGE_NAME }} + shell: bash + + - name: Upload to target cloud + run: | + . venv/bin/activate + export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml + + openstack image create "${{ env.IMAGE_NAME }}" \ + --file "${{ env.IMAGE_NAME }}" \ + --disk-format qcow2 \ + --container-format bare \ + --public + shell: bash + + - name: Delete old latest image from target cloud + run: | + . venv/bin/activate + export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml + + IMAGE_COUNT=$(openstack image list --name ${{ env.IMAGE_NAME }} -f value -c ID | wc -l) + if [ "$IMAGE_COUNT" -gt 1 ]; then + OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ env.IMAGE_NAME }}" -f value -c ID | head -n 1) + openstack image delete "$OLD_IMAGE_ID" + else + echo "Only one image exists, skipping deletion." + fi + shell: bash From 7183fcc26581e2461e3d9d63f39ed26b7ee54bef Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Mon, 30 Sep 2024 11:58:09 +0000 Subject: [PATCH 26/47] test image uploads in separate workflow --- .github/workflows/fatimage.yml | 149 --------------------------------- 1 file changed, 149 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index c4a88a6bb..92541b122 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -14,155 +14,8 @@ on: - cron: '0 0 * * *' # Run at midnight jobs: - openstack: - name: openstack-imagebuild - concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build - cancel-in-progress: true - runs-on: ubuntu-22.04 - strategy: - fail-fast: false # allow other matrix jobs to continue even if one fails - matrix: # build RL8+OFED, RL9+OFED, RL9+OFED+CUDA versions - os_version: - - RL8 - - RL9 - build: - - openstack.rocky-latest - - openstack.rocky-latest-cuda - exclude: - - os_version: RL8 - build: openstack.rocky-latest-cuda - - env: - ANSIBLE_FORCE_COLOR: True - OS_CLOUD: openstack - CI_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} - steps: - - uses: actions/checkout@v2 - - - name: Record settings for CI cloud - run: | - echo CI_CLOUD: ${{ env.CI_CLOUD }} - - - name: Setup ssh - run: | - set -x - mkdir ~/.ssh - echo "${{ secrets[format('{0}_SSH_KEY', env.CI_CLOUD)] }}" > ~/.ssh/id_rsa - chmod 0600 ~/.ssh/id_rsa - shell: bash - - - name: Add bastion's ssh key to known_hosts - run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts - shell: bash - - - name: Install ansible etc - run: dev/setup-env.sh - - - name: Write clouds.yaml - run: | - mkdir -p ~/.config/openstack/ - echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml - shell: bash - - - name: Setup environment - run: | - . venv/bin/activate - . environments/.stackhpc/activate - - - name: Build fat image with packer - id: packer_build - run: | - set -x - . venv/bin/activate - . environments/.stackhpc/activate - cd packer/ - packer init . - - PACKER_LOG=1 packer build \ - -on-error=${{ vars.PACKER_ON_ERROR }} \ - -only=${{ matrix.build }} \ - -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ - openstack.pkr.hcl - - env: - PKR_VAR_os_version: ${{ matrix.os_version }} - - - name: Get created image names from manifest - id: manifest - run: | - . venv/bin/activate - IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json) - while ! openstack image show -f value -c name $IMAGE_ID; do - sleep 5 - done - IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID) - echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" - echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" - - - name: Delete old latest image - run: | - . venv/bin/activate - IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) - if [ "$IMAGE_COUNT" -gt 1 ]; then - OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) - openstack image delete "$OLD_IMAGE_ID" - else - echo "Only one image exists, skipping deletion." - fi - - - name: Download image - run: | - . venv/bin/activate - sudo mkdir /mnt/images - sudo chmod 777 /mnt/images - openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-name }}" - openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: install libguestfs - run: | - sudo apt -y update - sudo apt -y install libguestfs-tools - - - name: mkdir for mount - run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' - - - name: mount qcow2 file - run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' - - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.17.0 - with: - scan-type: fs - scan-ref: "${{ steps.manifest.outputs.image-name }}" - scanners: "vuln" - format: sarif - output: "${{ steps.manifest.outputs.image-name }}.sarif" - # turn off secret scanning to speed things up - - - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" - category: "${{ matrix.os_version }}-${{ matrix.build }}" - - - name: Fail if scan has CRITICAL vulnerabilities - uses: aquasecurity/trivy-action@0.16.1 - with: - scan-type: fs - scan-ref: "${{ steps.manifest.outputs.image-name }}" - scanners: "vuln" - format: table - exit-code: '1' - severity: 'CRITICAL' - ignore-unfixed: true - upload: name: upload-nightly-targets - needs: openstack concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.image }}-${{ matrix.target_cloud }} cancel-in-progress: true @@ -227,8 +80,6 @@ jobs: openstack image create "${{ env.IMAGE_NAME }}" \ --file "${{ env.IMAGE_NAME }}" \ --disk-format qcow2 \ - --container-format bare \ - --public shell: bash - name: Delete old latest image from target cloud From 02fa0b666e9aeecfa09c3f7f41eccdbc4be84a95 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Mon, 30 Sep 2024 12:34:23 +0000 Subject: [PATCH 27/47] finish nightly build workflow --- .github/workflows/fatimage.yml | 149 +++++++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 92541b122..25b298f5a 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -14,8 +14,157 @@ on: - cron: '0 0 * * *' # Run at midnight jobs: + openstack: + name: openstack-imagebuild + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build + cancel-in-progress: true + runs-on: ubuntu-22.04 + strategy: + fail-fast: false # allow other matrix jobs to continue even if one fails + matrix: # build RL8+OFED, RL9+OFED, RL9+OFED+CUDA versions + os_version: + - RL8 + - RL9 + build: + - openstack.rocky-latest + - openstack.rocky-latest-cuda + exclude: + - os_version: RL8 + build: openstack.rocky-latest-cuda + + env: + ANSIBLE_FORCE_COLOR: True + OS_CLOUD: openstack + CI_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} + steps: + - uses: actions/checkout@v2 + + - name: Record settings for CI cloud + run: | + echo CI_CLOUD: ${{ env.CI_CLOUD }} + + - name: Setup ssh + run: | + set -x + mkdir ~/.ssh + echo "${{ secrets[format('{0}_SSH_KEY', env.CI_CLOUD)] }}" > ~/.ssh/id_rsa + chmod 0600 ~/.ssh/id_rsa + shell: bash + + - name: Add bastion's ssh key to known_hosts + run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts + shell: bash + + - name: Install ansible etc + run: dev/setup-env.sh + + - name: Write clouds.yaml + run: | + mkdir -p ~/.config/openstack/ + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml + shell: bash + + - name: Setup environment + run: | + . venv/bin/activate + . environments/.stackhpc/activate + + - name: Build fat image with packer + id: packer_build + run: | + set -x + . venv/bin/activate + . environments/.stackhpc/activate + cd packer/ + packer init . + + PACKER_LOG=1 packer build \ + -on-error=${{ vars.PACKER_ON_ERROR }} \ + -only=${{ matrix.build }} \ + -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ + openstack.pkr.hcl + + env: + PKR_VAR_os_version: ${{ matrix.os_version }} + + - name: Get created image names from manifest + id: manifest + run: | + . venv/bin/activate + IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json) + while ! openstack image show -f value -c name $IMAGE_ID; do + sleep 5 + done + IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID) + echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" + echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" + + - name: Delete old latest image + run: | + . venv/bin/activate + IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) + if [ "$IMAGE_COUNT" -gt 1 ]; then + OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) + openstack image delete "$OLD_IMAGE_ID" + else + echo "Only one image exists, skipping deletion." + fi + + - name: Download image + run: | + . venv/bin/activate + sudo mkdir /mnt/images + sudo chmod 777 /mnt/images + openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-name }}" + openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: install libguestfs + run: | + sudo apt -y update + sudo apt -y install libguestfs-tools + + - name: mkdir for mount + run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' + + - name: mount qcow2 file + run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@0.17.0 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: sarif + output: "${{ steps.manifest.outputs.image-name }}.sarif" + # turn off secret scanning to speed things up + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" + category: "${{ matrix.os_version }}-${{ matrix.build }}" + + - name: Fail if scan has CRITICAL vulnerabilities + uses: aquasecurity/trivy-action@0.16.1 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: table + exit-code: '1' + severity: 'CRITICAL' + ignore-unfixed: true + upload: name: upload-nightly-targets + needs: openstack concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.image }}-${{ matrix.target_cloud }} cancel-in-progress: true From 260146accb2c0e476063fb57f7cb98d4a3f8cdf2 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Mon, 30 Sep 2024 15:49:05 +0000 Subject: [PATCH 28/47] fix image delete logic --- .github/workflows/fatimage.yml | 35 +++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 25b298f5a..9b10fa0a5 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -100,24 +100,13 @@ jobs: echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" - - name: Delete old latest image - run: | - . venv/bin/activate - IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) - if [ "$IMAGE_COUNT" -gt 1 ]; then - OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) - openstack image delete "$OLD_IMAGE_ID" - else - echo "Only one image exists, skipping deletion." - fi - - name: Download image run: | . venv/bin/activate sudo mkdir /mnt/images sudo chmod 777 /mnt/images - openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-name }}" - openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} + openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-id }}" + openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-id }} - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -161,6 +150,26 @@ jobs: exit-code: '1' severity: 'CRITICAL' ignore-unfixed: true + + - name: Delete new image if Trivy scan fails + if: failure() && steps.packer_build.outcome == 'success' # Runs if the Trivy scan found crit vulnerabilities or failed + run: | + . venv/bin/activate + echo "Deleting new image due to critical vulnerabilities..." + openstack image delete "${{ steps.manifest.outputs.image-id }}" + + - name: Delete old latest image + if: success() # Runs only if Trivy scan passed + run: | + . venv/bin/activate + IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) + if [ "$IMAGE_COUNT" -gt 1 ]; then + OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) + echo "Deleting old image ID: $OLD_IMAGE_ID" + openstack image delete "$OLD_IMAGE_ID" + else + echo "Only one image exists, skipping deletion." + fi upload: name: upload-nightly-targets From fd44029589c0b1ccbb3bfdbd8f1c6ed773f0ede8 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 1 Oct 2024 14:31:07 +0000 Subject: [PATCH 29/47] use azimuth-cloud trivy db mirror --- .github/workflows/fatimage.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 9b10fa0a5..0e6331f90 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -132,7 +132,7 @@ jobs: output: "${{ steps.manifest.outputs.image-name }}.sarif" # turn off secret scanning to speed things up env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + TRIVY_DB_REPOSITORY: ghcr.io/azimuth-cloud/trivy-db:2 - name: Upload Trivy scan results to GitHub Security tab uses: github/codeql-action/upload-sarif@v3 @@ -150,6 +150,8 @@ jobs: exit-code: '1' severity: 'CRITICAL' ignore-unfixed: true + env: + TRIVY_DB_REPOSITORY: ghcr.io/azimuth-cloud/trivy-db:2 - name: Delete new image if Trivy scan fails if: failure() && steps.packer_build.outcome == 'success' # Runs if the Trivy scan found crit vulnerabilities or failed From 716352b91f6b1e1e447cda3b9523f3a2bcb19eb3 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 1 Oct 2024 15:15:24 +0000 Subject: [PATCH 30/47] use GITHUB_TOKEN env --- .github/workflows/fatimage.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 0e6331f90..e35e9e254 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -132,7 +132,7 @@ jobs: output: "${{ steps.manifest.outputs.image-name }}.sarif" # turn off secret scanning to speed things up env: - TRIVY_DB_REPOSITORY: ghcr.io/azimuth-cloud/trivy-db:2 + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Upload Trivy scan results to GitHub Security tab uses: github/codeql-action/upload-sarif@v3 @@ -151,8 +151,8 @@ jobs: severity: 'CRITICAL' ignore-unfixed: true env: - TRIVY_DB_REPOSITORY: ghcr.io/azimuth-cloud/trivy-db:2 - + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Delete new image if Trivy scan fails if: failure() && steps.packer_build.outcome == 'success' # Runs if the Trivy scan found crit vulnerabilities or failed run: | From cfa40d3c10e87276438b7118cbf75ae958633258 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 1 Oct 2024 18:45:45 +0000 Subject: [PATCH 31/47] test new fatimage build --- .github/workflows/fatimage.yml | 130 ++++----------------------------- 1 file changed, 13 insertions(+), 117 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index e35e9e254..0ec339b38 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -1,4 +1,4 @@ -name: Build nightly image +name: Build fat image on: workflow_dispatch: inputs: @@ -10,8 +10,6 @@ on: - LEAFCLOUD - SMS - ARCUS - schedule: - - cron: '0 0 * * *' # Run at midnight jobs: openstack: @@ -27,16 +25,16 @@ jobs: - RL8 - RL9 build: - - openstack.rocky-latest - - openstack.rocky-latest-cuda + - openstack.openhpc + - openstack.openhpc-cuda exclude: - os_version: RL8 - build: openstack.rocky-latest-cuda + build: openstack.openhpc-cuda env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack - CI_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} + CI_CLOUD: ${{ github.event.inputs.ci_cloud }} steps: - uses: actions/checkout@v2 @@ -79,12 +77,18 @@ jobs: cd packer/ packer init . + if [[ "${{ matrix.build }}" == *"cuda"* ]]; then + SOURCE_IMAGE_NAME="rocky-latest-cuda" + else + SOURCE_IMAGE_NAME="rocky-latest" + fi + PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ + -var "source_image_name={\"${{ matrix.os_version }}\":\"${SOURCE_IMAGE_NAME}-${{ matrix.os_version }}\"}" \ openstack.pkr.hcl - env: PKR_VAR_os_version: ${{ matrix.os_version }} @@ -105,8 +109,7 @@ jobs: . venv/bin/activate sudo mkdir /mnt/images sudo chmod 777 /mnt/images - openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-id }}" - openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-id }} + openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} - name: Set up QEMU uses: docker/setup-qemu-action@v3 @@ -131,8 +134,6 @@ jobs: format: sarif output: "${{ steps.manifest.outputs.image-name }}.sarif" # turn off secret scanning to speed things up - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Upload Trivy scan results to GitHub Security tab uses: github/codeql-action/upload-sarif@v3 @@ -150,108 +151,3 @@ jobs: exit-code: '1' severity: 'CRITICAL' ignore-unfixed: true - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Delete new image if Trivy scan fails - if: failure() && steps.packer_build.outcome == 'success' # Runs if the Trivy scan found crit vulnerabilities or failed - run: | - . venv/bin/activate - echo "Deleting new image due to critical vulnerabilities..." - openstack image delete "${{ steps.manifest.outputs.image-id }}" - - - name: Delete old latest image - if: success() # Runs only if Trivy scan passed - run: | - . venv/bin/activate - IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) - if [ "$IMAGE_COUNT" -gt 1 ]; then - OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) - echo "Deleting old image ID: $OLD_IMAGE_ID" - openstack image delete "$OLD_IMAGE_ID" - else - echo "Only one image exists, skipping deletion." - fi - - upload: - name: upload-nightly-targets - needs: openstack - concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.image }}-${{ matrix.target_cloud }} - cancel-in-progress: true - runs-on: ubuntu-22.04 - strategy: - fail-fast: false - matrix: - target_cloud: - - LEAFCLOUD - - SMS - - ARCUS - os_version: - - RL8 - - RL9 - image: - - rocky-latest - - rocky-latest-cuda - exclude: - - os_version: RL8 - image: rocky-latest-cuda - - target_cloud: LEAFCLOUD - env: - OS_CLOUD: openstack - SOURCE_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} - TARGET_CLOUD: ${{ matrix.target_cloud }} - IMAGE_NAME: "${{ matrix.image }}-${{ matrix.os_version }}" - steps: - - uses: actions/checkout@v2 - - - name: Record settings for CI cloud - run: | - echo SOURCE_CLOUD: ${{ env.SOURCE_CLOUD }} - echo TARGET_CLOUD: ${{ env.TARGET_CLOUD }} - - - name: Install openstackclient - run: | - python3 -m venv venv - . venv/bin/activate - pip install -U pip - pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt) - shell: bash - - - name: Write clouds.yaml - run: | - mkdir -p ~/.config/openstack/ - echo "${{ secrets[format('{0}_CLOUDS_YAML', env.SOURCE_CLOUD)] }}" > ~/.config/openstack/source_clouds.yaml - echo "${{ secrets[format('{0}_CLOUDS_YAML', env.TARGET_CLOUD)] }}" > ~/.config/openstack/target_clouds.yaml - shell: bash - - - name: Download source image - run: | - . venv/bin/activate - export OS_CLIENT_CONFIG_FILE=~/.config/openstack/source_clouds.yaml - openstack image save --file ${{ env.IMAGE_NAME }} ${{ env.IMAGE_NAME }} - shell: bash - - - name: Upload to target cloud - run: | - . venv/bin/activate - export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml - - openstack image create "${{ env.IMAGE_NAME }}" \ - --file "${{ env.IMAGE_NAME }}" \ - --disk-format qcow2 \ - shell: bash - - - name: Delete old latest image from target cloud - run: | - . venv/bin/activate - export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml - - IMAGE_COUNT=$(openstack image list --name ${{ env.IMAGE_NAME }} -f value -c ID | wc -l) - if [ "$IMAGE_COUNT" -gt 1 ]; then - OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ env.IMAGE_NAME }}" -f value -c ID | head -n 1) - openstack image delete "$OLD_IMAGE_ID" - else - echo "Only one image exists, skipping deletion." - fi - shell: bash From 6bffdf7d0e48a87bb49a5764d6c08af4fa7fb125 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Wed, 2 Oct 2024 11:02:24 +0000 Subject: [PATCH 32/47] add final nightlybuilds workflow --- .github/workflows/nightlybuild.yml | 257 +++++++++++++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 .github/workflows/nightlybuild.yml diff --git a/.github/workflows/nightlybuild.yml b/.github/workflows/nightlybuild.yml new file mode 100644 index 000000000..e35e9e254 --- /dev/null +++ b/.github/workflows/nightlybuild.yml @@ -0,0 +1,257 @@ +name: Build nightly image +on: + workflow_dispatch: + inputs: + ci_cloud: + description: 'Select the CI_CLOUD' + required: true + type: choice + options: + - LEAFCLOUD + - SMS + - ARCUS + schedule: + - cron: '0 0 * * *' # Run at midnight + +jobs: + openstack: + name: openstack-imagebuild + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build + cancel-in-progress: true + runs-on: ubuntu-22.04 + strategy: + fail-fast: false # allow other matrix jobs to continue even if one fails + matrix: # build RL8+OFED, RL9+OFED, RL9+OFED+CUDA versions + os_version: + - RL8 + - RL9 + build: + - openstack.rocky-latest + - openstack.rocky-latest-cuda + exclude: + - os_version: RL8 + build: openstack.rocky-latest-cuda + + env: + ANSIBLE_FORCE_COLOR: True + OS_CLOUD: openstack + CI_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} + steps: + - uses: actions/checkout@v2 + + - name: Record settings for CI cloud + run: | + echo CI_CLOUD: ${{ env.CI_CLOUD }} + + - name: Setup ssh + run: | + set -x + mkdir ~/.ssh + echo "${{ secrets[format('{0}_SSH_KEY', env.CI_CLOUD)] }}" > ~/.ssh/id_rsa + chmod 0600 ~/.ssh/id_rsa + shell: bash + + - name: Add bastion's ssh key to known_hosts + run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts + shell: bash + + - name: Install ansible etc + run: dev/setup-env.sh + + - name: Write clouds.yaml + run: | + mkdir -p ~/.config/openstack/ + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml + shell: bash + + - name: Setup environment + run: | + . venv/bin/activate + . environments/.stackhpc/activate + + - name: Build fat image with packer + id: packer_build + run: | + set -x + . venv/bin/activate + . environments/.stackhpc/activate + cd packer/ + packer init . + + PACKER_LOG=1 packer build \ + -on-error=${{ vars.PACKER_ON_ERROR }} \ + -only=${{ matrix.build }} \ + -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ + openstack.pkr.hcl + + env: + PKR_VAR_os_version: ${{ matrix.os_version }} + + - name: Get created image names from manifest + id: manifest + run: | + . venv/bin/activate + IMAGE_ID=$(jq --raw-output '.builds[-1].artifact_id' packer/packer-manifest.json) + while ! openstack image show -f value -c name $IMAGE_ID; do + sleep 5 + done + IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID) + echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" + echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" + + - name: Download image + run: | + . venv/bin/activate + sudo mkdir /mnt/images + sudo chmod 777 /mnt/images + openstack image unset --property signature_verified "${{ steps.manifest.outputs.image-id }}" + openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-id }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: install libguestfs + run: | + sudo apt -y update + sudo apt -y install libguestfs-tools + + - name: mkdir for mount + run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' + + - name: mount qcow2 file + run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@0.17.0 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: sarif + output: "${{ steps.manifest.outputs.image-name }}.sarif" + # turn off secret scanning to speed things up + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" + category: "${{ matrix.os_version }}-${{ matrix.build }}" + + - name: Fail if scan has CRITICAL vulnerabilities + uses: aquasecurity/trivy-action@0.16.1 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: table + exit-code: '1' + severity: 'CRITICAL' + ignore-unfixed: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Delete new image if Trivy scan fails + if: failure() && steps.packer_build.outcome == 'success' # Runs if the Trivy scan found crit vulnerabilities or failed + run: | + . venv/bin/activate + echo "Deleting new image due to critical vulnerabilities..." + openstack image delete "${{ steps.manifest.outputs.image-id }}" + + - name: Delete old latest image + if: success() # Runs only if Trivy scan passed + run: | + . venv/bin/activate + IMAGE_COUNT=$(openstack image list --name ${{ steps.manifest.outputs.image-name }} -f value -c ID | wc -l) + if [ "$IMAGE_COUNT" -gt 1 ]; then + OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ steps.manifest.outputs.image-name }}" -f value -c ID | head -n 1) + echo "Deleting old image ID: $OLD_IMAGE_ID" + openstack image delete "$OLD_IMAGE_ID" + else + echo "Only one image exists, skipping deletion." + fi + + upload: + name: upload-nightly-targets + needs: openstack + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.image }}-${{ matrix.target_cloud }} + cancel-in-progress: true + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + target_cloud: + - LEAFCLOUD + - SMS + - ARCUS + os_version: + - RL8 + - RL9 + image: + - rocky-latest + - rocky-latest-cuda + exclude: + - os_version: RL8 + image: rocky-latest-cuda + - target_cloud: LEAFCLOUD + env: + OS_CLOUD: openstack + SOURCE_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} + TARGET_CLOUD: ${{ matrix.target_cloud }} + IMAGE_NAME: "${{ matrix.image }}-${{ matrix.os_version }}" + steps: + - uses: actions/checkout@v2 + + - name: Record settings for CI cloud + run: | + echo SOURCE_CLOUD: ${{ env.SOURCE_CLOUD }} + echo TARGET_CLOUD: ${{ env.TARGET_CLOUD }} + + - name: Install openstackclient + run: | + python3 -m venv venv + . venv/bin/activate + pip install -U pip + pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt) + shell: bash + + - name: Write clouds.yaml + run: | + mkdir -p ~/.config/openstack/ + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.SOURCE_CLOUD)] }}" > ~/.config/openstack/source_clouds.yaml + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.TARGET_CLOUD)] }}" > ~/.config/openstack/target_clouds.yaml + shell: bash + + - name: Download source image + run: | + . venv/bin/activate + export OS_CLIENT_CONFIG_FILE=~/.config/openstack/source_clouds.yaml + openstack image save --file ${{ env.IMAGE_NAME }} ${{ env.IMAGE_NAME }} + shell: bash + + - name: Upload to target cloud + run: | + . venv/bin/activate + export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml + + openstack image create "${{ env.IMAGE_NAME }}" \ + --file "${{ env.IMAGE_NAME }}" \ + --disk-format qcow2 \ + shell: bash + + - name: Delete old latest image from target cloud + run: | + . venv/bin/activate + export OS_CLIENT_CONFIG_FILE=~/.config/openstack/target_clouds.yaml + + IMAGE_COUNT=$(openstack image list --name ${{ env.IMAGE_NAME }} -f value -c ID | wc -l) + if [ "$IMAGE_COUNT" -gt 1 ]; then + OLD_IMAGE_ID=$(openstack image list --sort created_at:asc --name "${{ env.IMAGE_NAME }}" -f value -c ID | head -n 1) + openstack image delete "$OLD_IMAGE_ID" + else + echo "Only one image exists, skipping deletion." + fi + shell: bash From 348c1508e4463a3a58618a5f7bc5cc4020c6c21a Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 3 Oct 2024 10:05:16 +0000 Subject: [PATCH 33/47] move trivy scan to separate workflow --- .github/workflows/fatimage.yml | 60 +++++++--------------------------- 1 file changed, 11 insertions(+), 49 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 0ec339b38..96da6cee2 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -30,11 +30,12 @@ jobs: exclude: - os_version: RL8 build: openstack.openhpc-cuda - env: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack CI_CLOUD: ${{ github.event.inputs.ci_cloud }} + BUILD: "${{ matrix.build }}-${{ matrix.os_version }}" + steps: - uses: actions/checkout@v2 @@ -101,53 +102,14 @@ jobs: sleep 5 done IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID) - echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" - echo "image-id=$IMAGE_ID" >> "$GITHUB_OUTPUT" - - - name: Download image - run: | - . venv/bin/activate - sudo mkdir /mnt/images - sudo chmod 777 /mnt/images - openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - - name: install libguestfs - run: | - sudo apt -y update - sudo apt -y install libguestfs-tools - - - name: mkdir for mount - run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' - - - name: mount qcow2 file - run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' - - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.17.0 - with: - scan-type: fs - scan-ref: "${{ steps.manifest.outputs.image-name }}" - scanners: "vuln" - format: sarif - output: "${{ steps.manifest.outputs.image-name }}.sarif" - # turn off secret scanning to speed things up - - - name: Upload Trivy scan results to GitHub Security tab - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" - category: "${{ matrix.os_version }}-${{ matrix.build }}" + echo $IMAGE_ID > image-id-${{ env.BUILD }}.txt + echo $IMAGE_NAME > image-name-${{ env.BUILD }}.txt - - name: Fail if scan has CRITICAL vulnerabilities - uses: aquasecurity/trivy-action@0.16.1 + - name: Upload manifest artifact + uses: actions/upload-artifact@v4 with: - scan-type: fs - scan-ref: "${{ steps.manifest.outputs.image-name }}" - scanners: "vuln" - format: table - exit-code: '1' - severity: 'CRITICAL' - ignore-unfixed: true + name: image-details + path: | + ./image-id-${{ env.BUILD }}.txt + ./image-name-${{ env.BUILD }}.txt + overwrite: true \ No newline at end of file From 6c37ac811c4e25620120e474e18f08151699f324 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 3 Oct 2024 12:23:06 +0000 Subject: [PATCH 34/47] bump image and test new trivy scan --- .github/workflows/trivyscan.yml | 92 ++++++++++++++++++++++++ environments/.stackhpc/terraform/main.tf | 4 +- 2 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/trivyscan.yml diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml new file mode 100644 index 000000000..d6200718d --- /dev/null +++ b/.github/workflows/trivyscan.yml @@ -0,0 +1,92 @@ +name: Trivy scan image for vulnerabilities +on: + workflow_dispatch: + pull_request: + branches: + - main + push: + branches: + - ci/nightly-builds + +jobs: + scan: + concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build + cancel-in-progress: true + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + os_version: + - RL8 + - RL9 + build: + - openstack.openhpc + - openstack.openhpc-cuda + exclude: + - os_version: RL8 + build: openstack.openhpc-cuda + env: + BUILD: ${{ matrix.build }}-${{ matrix.os_version }} + + steps: + - name: Download image details artifact + uses: actions/download-artifact@v4 + with: + name: image-details + + - name: Use the downloaded artifact + id: manifest + run: | + IMAGE_ID=$(cat image-id-${{ env.BUILD }}.txt) + IMAGE_NAME=$(cat image-name-${{ env.BUILD }}.txt) + echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" + echo "image-id=${IMAGE_ID}" >> "$GITHUB_OUTPUT" + + - name: Download image + run: | + . venv/bin/activate + sudo mkdir /mnt/images + sudo chmod 777 /mnt/images + openstack image save --file /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 ${{ steps.manifest.outputs.image-name }} + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: install libguestfs + run: | + sudo apt -y update + sudo apt -y install libguestfs-tools + + - name: mkdir for mount + run: sudo mkdir -p './${{ steps.manifest.outputs.image-name }}' + + - name: mount qcow2 file + run: sudo guestmount -a /mnt/images/${{ steps.manifest.outputs.image-name }}.qcow2 -i --ro -o allow_other './${{ steps.manifest.outputs.image-name }}' + + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@0.24.0 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: sarif + output: "${{ steps.manifest.outputs.image-name }}.sarif" + # turn off secret scanning to speed things up + + - name: Upload Trivy scan results to GitHub Security tab + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: "${{ steps.manifest.outputs.image-name }}.sarif" + category: "${{ matrix.os_version }}-${{ matrix.build }}" + + - name: Fail if scan has CRITICAL vulnerabilities + uses: aquasecurity/trivy-action@0.24.0 + with: + scan-type: fs + scan-ref: "${{ steps.manifest.outputs.image-name }}" + scanners: "vuln" + format: table + exit-code: '1' + severity: 'CRITICAL' + ignore-unfixed: true diff --git a/environments/.stackhpc/terraform/main.tf b/environments/.stackhpc/terraform/main.tf index 0b34a4947..f4668cf74 100644 --- a/environments/.stackhpc/terraform/main.tf +++ b/environments/.stackhpc/terraform/main.tf @@ -30,8 +30,8 @@ variable "cluster_image" { type = map(string) default = { # https://github.com/stackhpc/ansible-slurm-appliance/pull/427 - RL8: "openhpc-ofed-RL8-240906-1042-32568dbb" - RL9: "openhpc-ofed-RL9-240906-1041-32568dbb" + RL8: "openhpc-RL8-241003-1122-348c1508" + RL9: "openhpc-RL9-241003-1122-348c1508" } } From a256bce2b00141f658ef1baf63b325858b301c15 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 3 Oct 2024 12:37:59 +0000 Subject: [PATCH 35/47] fix artifact creation --- .github/workflows/fatimage.yml | 10 +++++----- .github/workflows/trivyscan.yml | 9 +++------ 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 96da6cee2..472e60a15 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -102,14 +102,14 @@ jobs: sleep 5 done IMAGE_NAME=$(openstack image show -f value -c name $IMAGE_ID) - echo $IMAGE_ID > image-id-${{ env.BUILD }}.txt - echo $IMAGE_NAME > image-name-${{ env.BUILD }}.txt + echo $IMAGE_ID > image-id.txt + echo $IMAGE_NAME > image-name.txt - name: Upload manifest artifact uses: actions/upload-artifact@v4 with: - name: image-details + name: image-details-${{ env.BUILD }} path: | - ./image-id-${{ env.BUILD }}.txt - ./image-name-${{ env.BUILD }}.txt + ./image-id.txt + ./image-name.txt overwrite: true \ No newline at end of file diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index d6200718d..fbab18032 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -1,9 +1,6 @@ name: Trivy scan image for vulnerabilities on: workflow_dispatch: - pull_request: - branches: - - main push: branches: - ci/nightly-builds @@ -33,13 +30,13 @@ jobs: - name: Download image details artifact uses: actions/download-artifact@v4 with: - name: image-details + name: image-details-${{ env.BUILD }} - name: Use the downloaded artifact id: manifest run: | - IMAGE_ID=$(cat image-id-${{ env.BUILD }}.txt) - IMAGE_NAME=$(cat image-name-${{ env.BUILD }}.txt) + IMAGE_ID=$(cat image-id.txt) + IMAGE_NAME=$(cat image-name.txt) echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" echo "image-id=${IMAGE_ID}" >> "$GITHUB_OUTPUT" From 3fbc9e994c0f49e756c5eb734914613f9563c14d Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 3 Oct 2024 13:17:27 +0000 Subject: [PATCH 36/47] bump image and test trivy scan --- .github/workflows/trivyscan.yml | 61 +++++++++++++------ .../.stackhpc/terraform/cluster_image.json | 5 ++ environments/.stackhpc/terraform/main.tf | 6 +- 3 files changed, 48 insertions(+), 24 deletions(-) create mode 100644 environments/.stackhpc/terraform/cluster_image.json diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index fbab18032..80be48542 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -8,37 +8,56 @@ on: jobs: scan: concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.os_version }}-${{ matrix.build }} # to branch/PR + OS + build + group: ${{ github.workflow }}-${{ github.ref }}-${{ matrix.build }} # to branch/PR + OS + build cancel-in-progress: true runs-on: ubuntu-latest strategy: fail-fast: false matrix: - os_version: - - RL8 - - RL9 - build: - - openstack.openhpc - - openstack.openhpc-cuda - exclude: - - os_version: RL8 - build: openstack.openhpc-cuda + build: ["RL8", "RL9", "RL9-cuda"] env: - BUILD: ${{ matrix.build }}-${{ matrix.os_version }} + JSON_PATH: environments/.stackhpc/terraform/cluster_image.json + OS_CLOUD: openstack + CI_CLOUD: ${{ vars.CI_CLOUD }} steps: - - name: Download image details artifact - uses: actions/download-artifact@v4 - with: - name: image-details-${{ env.BUILD }} + - uses: actions/checkout@v2 + + - name: Record settings for CI cloud + run: | + echo CI_CLOUD: ${{ env.CI_CLOUD }} + + - name: Setup ssh + run: | + set -x + mkdir ~/.ssh + echo "${{ secrets[format('{0}_SSH_KEY', env.CI_CLOUD)] }}" > ~/.ssh/id_rsa + chmod 0600 ~/.ssh/id_rsa + shell: bash + + - name: Add bastion's ssh key to known_hosts + run: cat environments/.stackhpc/bastion_fingerprints >> ~/.ssh/known_hosts + shell: bash + + - name: setup environment + run: | + python3 -m venv venv + . venv/bin/activate + pip install -U pip + pip install $(grep -o 'python-openstackclient[><=0-9\.]*' requirements.txt) + shell: bash + + - name: Write clouds.yaml + run: | + mkdir -p ~/.config/openstack/ + echo "${{ secrets[format('{0}_CLOUDS_YAML', env.CI_CLOUD)] }}" > ~/.config/openstack/clouds.yaml + shell: bash - - name: Use the downloaded artifact + - name: Parse image name json id: manifest run: | - IMAGE_ID=$(cat image-id.txt) - IMAGE_NAME=$(cat image-name.txt) + IMAGE_NAME=$(jq --arg version "${{ matrix.build }}" -r '.[$version]' "${{ env.JSON_PATH }}") echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" - echo "image-id=${IMAGE_ID}" >> "$GITHUB_OUTPUT" - name: Download image run: | @@ -70,6 +89,8 @@ jobs: format: sarif output: "${{ steps.manifest.outputs.image-name }}.sarif" # turn off secret scanning to speed things up + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Upload Trivy scan results to GitHub Security tab uses: github/codeql-action/upload-sarif@v3 @@ -87,3 +108,5 @@ jobs: exit-code: '1' severity: 'CRITICAL' ignore-unfixed: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/environments/.stackhpc/terraform/cluster_image.json b/environments/.stackhpc/terraform/cluster_image.json new file mode 100644 index 000000000..72eb672bf --- /dev/null +++ b/environments/.stackhpc/terraform/cluster_image.json @@ -0,0 +1,5 @@ +{ + "RL8": "openhpc-RL8-241003-1241-a256bce2", + "RL9": "openhpc-RL9-241003-1241-a256bce2", + "RL9-cuda": "openhpc-cuda-RL9-241003-1242-a256bce2" +} \ No newline at end of file diff --git a/environments/.stackhpc/terraform/main.tf b/environments/.stackhpc/terraform/main.tf index f4668cf74..1ac1556f4 100644 --- a/environments/.stackhpc/terraform/main.tf +++ b/environments/.stackhpc/terraform/main.tf @@ -28,11 +28,7 @@ variable "os_version" { variable "cluster_image" { description = "single image for all cluster nodes, keyed by os_version - a convenience for CI" type = map(string) - default = { - # https://github.com/stackhpc/ansible-slurm-appliance/pull/427 - RL8: "openhpc-RL8-241003-1122-348c1508" - RL9: "openhpc-RL9-241003-1122-348c1508" - } + default = jsondecode(file("./cluster_image.json")) } variable "cluster_net" {} From a60dbf445aba778f362ed44570b77de9823dd548 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Thu, 3 Oct 2024 14:52:40 +0000 Subject: [PATCH 37/47] only run trivy scan on image bumps --- .github/workflows/trivyscan.yml | 2 ++ .../.stackhpc/terraform/cluster_image.auto.tfvars.json | 7 +++++++ environments/.stackhpc/terraform/cluster_image.json | 5 ----- environments/.stackhpc/terraform/main.tf | 1 - 4 files changed, 9 insertions(+), 6 deletions(-) create mode 100644 environments/.stackhpc/terraform/cluster_image.auto.tfvars.json delete mode 100644 environments/.stackhpc/terraform/cluster_image.json diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index 80be48542..b20bb504d 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -4,6 +4,8 @@ on: push: branches: - ci/nightly-builds + paths: + - 'environments/.stackhpc/terraform/cluster_image.json' jobs: scan: diff --git a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json new file mode 100644 index 000000000..c2a1adef3 --- /dev/null +++ b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json @@ -0,0 +1,7 @@ +{ + "cluster_image": { + "RL8": "openhpc-RL8-241003-1241-a256bce2", + "RL9": "openhpc-RL9-241003-1241-a256bce2", + "RL9-cuda": "openhpc-cuda-RL9-241003-1242-a256bce2" + } +} \ No newline at end of file diff --git a/environments/.stackhpc/terraform/cluster_image.json b/environments/.stackhpc/terraform/cluster_image.json deleted file mode 100644 index 72eb672bf..000000000 --- a/environments/.stackhpc/terraform/cluster_image.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "RL8": "openhpc-RL8-241003-1241-a256bce2", - "RL9": "openhpc-RL9-241003-1241-a256bce2", - "RL9-cuda": "openhpc-cuda-RL9-241003-1242-a256bce2" -} \ No newline at end of file diff --git a/environments/.stackhpc/terraform/main.tf b/environments/.stackhpc/terraform/main.tf index 1ac1556f4..ad13ae45d 100644 --- a/environments/.stackhpc/terraform/main.tf +++ b/environments/.stackhpc/terraform/main.tf @@ -28,7 +28,6 @@ variable "os_version" { variable "cluster_image" { description = "single image for all cluster nodes, keyed by os_version - a convenience for CI" type = map(string) - default = jsondecode(file("./cluster_image.json")) } variable "cluster_net" {} From d883e55ce66960d2737458e64e11776e55093902 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 4 Oct 2024 09:02:38 +0000 Subject: [PATCH 38/47] bump image to test trivy scan run condition --- .github/workflows/trivyscan.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index b20bb504d..51553a5ea 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -5,7 +5,7 @@ on: branches: - ci/nightly-builds paths: - - 'environments/.stackhpc/terraform/cluster_image.json' + - 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json' jobs: scan: From 00eaf4e01d0947c07e0c9aef6fcfb154a3262790 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 4 Oct 2024 09:09:54 +0000 Subject: [PATCH 39/47] bump cuda image --- .github/workflows/trivyscan.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index 51553a5ea..aeed08f67 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -5,7 +5,7 @@ on: branches: - ci/nightly-builds paths: - - 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json' + - 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json/cluster_image' jobs: scan: @@ -18,7 +18,7 @@ jobs: matrix: build: ["RL8", "RL9", "RL9-cuda"] env: - JSON_PATH: environments/.stackhpc/terraform/cluster_image.json + JSON_PATH: environments/.stackhpc/terraform/cluster_image.auto.tfvars.json/cluster_image OS_CLOUD: openstack CI_CLOUD: ${{ vars.CI_CLOUD }} From 8630ebbe894542a7ba5f7879057f35a1fdfdc102 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 4 Oct 2024 09:20:47 +0000 Subject: [PATCH 40/47] bump image --- .github/workflows/trivyscan.yml | 6 +++--- .../.stackhpc/terraform/cluster_image.auto.tfvars.json | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index aeed08f67..5afd76b83 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -5,7 +5,7 @@ on: branches: - ci/nightly-builds paths: - - 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json/cluster_image' + - 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json' jobs: scan: @@ -18,7 +18,7 @@ jobs: matrix: build: ["RL8", "RL9", "RL9-cuda"] env: - JSON_PATH: environments/.stackhpc/terraform/cluster_image.auto.tfvars.json/cluster_image + JSON_PATH: environments/.stackhpc/terraform/cluster_image.auto.tfvars.json OS_CLOUD: openstack CI_CLOUD: ${{ vars.CI_CLOUD }} @@ -58,7 +58,7 @@ jobs: - name: Parse image name json id: manifest run: | - IMAGE_NAME=$(jq --arg version "${{ matrix.build }}" -r '.[$version]' "${{ env.JSON_PATH }}") + IMAGE_NAME=$(jq --arg version "${{ matrix.build }}" -r '.cluster_image[$version]' "${{ env.JSON_PATH }}") echo "image-name=${IMAGE_NAME}" >> "$GITHUB_OUTPUT" - name: Download image diff --git a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json index c2a1adef3..984aa0d42 100644 --- a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json @@ -2,6 +2,6 @@ "cluster_image": { "RL8": "openhpc-RL8-241003-1241-a256bce2", "RL9": "openhpc-RL9-241003-1241-a256bce2", - "RL9-cuda": "openhpc-cuda-RL9-241003-1242-a256bce2" + "RL9-cuda": "openhpc-cuda-RL9-241003-1122-348c1508" } } \ No newline at end of file From 46c1c15123de7d46c1728ce8c26719cb3d866a50 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 4 Oct 2024 09:57:53 +0000 Subject: [PATCH 41/47] extend timeout for trivy scanning cuda image --- .github/workflows/trivyscan.yml | 2 ++ environments/.stackhpc/terraform/cluster_image.auto.tfvars.json | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index 5afd76b83..7bb5c8d71 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -91,6 +91,7 @@ jobs: format: sarif output: "${{ steps.manifest.outputs.image-name }}.sarif" # turn off secret scanning to speed things up + timeout: 15m env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -110,5 +111,6 @@ jobs: exit-code: '1' severity: 'CRITICAL' ignore-unfixed: true + timeout: 15m env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json index 984aa0d42..c2a1adef3 100644 --- a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json @@ -2,6 +2,6 @@ "cluster_image": { "RL8": "openhpc-RL8-241003-1241-a256bce2", "RL9": "openhpc-RL9-241003-1241-a256bce2", - "RL9-cuda": "openhpc-cuda-RL9-241003-1122-348c1508" + "RL9-cuda": "openhpc-cuda-RL9-241003-1242-a256bce2" } } \ No newline at end of file From 22dcfbb1867041d47b86b432276f8b598f421328 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Fri, 4 Oct 2024 12:42:14 +0000 Subject: [PATCH 42/47] Run workflow on PR to main --- .github/workflows/trivyscan.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index 7bb5c8d71..82b123e92 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -1,9 +1,10 @@ name: Trivy scan image for vulnerabilities on: workflow_dispatch: - push: + pull_request: branches: - - ci/nightly-builds + - main + - master paths: - 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json' From c9ebf42d7c14e815f9fd8b37e70d878b5a33941e Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Tue, 8 Oct 2024 13:39:39 +0000 Subject: [PATCH 43/47] address PR comments --- .github/workflows/fatimage.yml | 3 +-- .github/workflows/nightlybuild.yml | 4 ++-- .github/workflows/trivyscan.yml | 1 - ansible/bootstrap.yml | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 472e60a15..c4779f955 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -34,7 +34,6 @@ jobs: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack CI_CLOUD: ${{ github.event.inputs.ci_cloud }} - BUILD: "${{ matrix.build }}-${{ matrix.os_version }}" steps: - uses: actions/checkout@v2 @@ -108,7 +107,7 @@ jobs: - name: Upload manifest artifact uses: actions/upload-artifact@v4 with: - name: image-details-${{ env.BUILD }} + name: image-details-"${{ matrix.build }}-${{ matrix.os_version }}" path: | ./image-id.txt ./image-name.txt diff --git a/.github/workflows/nightlybuild.yml b/.github/workflows/nightlybuild.yml index e35e9e254..a90847dbe 100644 --- a/.github/workflows/nightlybuild.yml +++ b/.github/workflows/nightlybuild.yml @@ -22,7 +22,7 @@ jobs: runs-on: ubuntu-22.04 strategy: fail-fast: false # allow other matrix jobs to continue even if one fails - matrix: # build RL8+OFED, RL9+OFED, RL9+OFED+CUDA versions + matrix: # build RL8, RL9, RL9+CUDA versions os_version: - RL8 - RL9 @@ -157,7 +157,7 @@ jobs: if: failure() && steps.packer_build.outcome == 'success' # Runs if the Trivy scan found crit vulnerabilities or failed run: | . venv/bin/activate - echo "Deleting new image due to critical vulnerabilities..." + echo "Deleting new image due to critical vulnerabilities or scan failure ..." openstack image delete "${{ steps.manifest.outputs.image-id }}" - name: Delete old latest image diff --git a/.github/workflows/trivyscan.yml b/.github/workflows/trivyscan.yml index 82b123e92..2957b22ee 100644 --- a/.github/workflows/trivyscan.yml +++ b/.github/workflows/trivyscan.yml @@ -4,7 +4,6 @@ on: pull_request: branches: - main - - master paths: - 'environments/.stackhpc/terraform/cluster_image.auto.tfvars.json' diff --git a/ansible/bootstrap.yml b/ansible/bootstrap.yml index 9a7b991dc..18d159996 100644 --- a/ansible/bootstrap.yml +++ b/ansible/bootstrap.yml @@ -148,7 +148,7 @@ tags: cockpit tasks: - name: Remove RHEL cockpit - command: dnf -y remove cockpit-ws + command: dnf -y remove cockpit-ws # N.B. using ansible dnf module is very slow register: dnf_remove_output ignore_errors: true # Avoid failing if a lock or other error happens From 527f7bc1f4c70012dd861bc75acf92e5b4787d91 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Wed, 9 Oct 2024 09:50:33 +0000 Subject: [PATCH 44/47] fix source_image_name packer parse --- .github/workflows/fatimage.yml | 21 ++++++++++++++------- .github/workflows/nightlybuild.yml | 9 +++++++++ packer/openstack.pkr.hcl | 10 +++------- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index c4779f955..065ce457c 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -34,6 +34,17 @@ jobs: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack CI_CLOUD: ${{ github.event.inputs.ci_cloud }} + SOURCE_IMAGES_MAP: | + { + "RL8": { + "openstack.openhpc": "rocky-latest-RL8", + "openstack.openhpc-cuda": "rocky-latest-cuda-RL8" + }, + "RL9": { + "openstack.openhpc": "rocky-latest-RL9", + "openstack.openhpc-cuda": "rocky-latest-cuda-RL9" + } + } steps: - uses: actions/checkout@v2 @@ -77,17 +88,13 @@ jobs: cd packer/ packer init . - if [[ "${{ matrix.build }}" == *"cuda"* ]]; then - SOURCE_IMAGE_NAME="rocky-latest-cuda" - else - SOURCE_IMAGE_NAME="rocky-latest" - fi + SOURCE_IMAGE=${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version][matrix.build] }} PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ - -var "source_image_name={\"${{ matrix.os_version }}\":\"${SOURCE_IMAGE_NAME}-${{ matrix.os_version }}\"}" \ + -var "source_image_name=${SOURCE_IMAGE}" \ openstack.pkr.hcl env: PKR_VAR_os_version: ${{ matrix.os_version }} @@ -107,7 +114,7 @@ jobs: - name: Upload manifest artifact uses: actions/upload-artifact@v4 with: - name: image-details-"${{ matrix.build }}-${{ matrix.os_version }}" + name: image-details-${{ matrix.build }}-${{ matrix.os_version }} path: | ./image-id.txt ./image-name.txt diff --git a/.github/workflows/nightlybuild.yml b/.github/workflows/nightlybuild.yml index a90847dbe..bb6f13dac 100644 --- a/.github/workflows/nightlybuild.yml +++ b/.github/workflows/nightlybuild.yml @@ -37,6 +37,12 @@ jobs: ANSIBLE_FORCE_COLOR: True OS_CLOUD: openstack CI_CLOUD: ${{ github.event.inputs.ci_cloud || vars.CI_CLOUD }} + SOURCE_IMAGES_MAP: | + { + "RL8": "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2", + "RL9": "Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2" + } + steps: - uses: actions/checkout@v2 @@ -79,10 +85,13 @@ jobs: cd packer/ packer init . + SOURCE_IMAGE=${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version] }} + PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ + -var "source_image_name=${SOURCE_IMAGE}" openstack.pkr.hcl env: diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index 3f41ff041..733899d72 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -47,12 +47,8 @@ variable "os_version" { # Must supply either source_image_name or source_image_id variable "source_image_name" { - type = map(string) - description = "name of source image, keyed from var.os_version" - default = { - RL8: "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2" - RL9: "Rocky-9-GenericCloud-Base-9.4-20240523.0.x86_64.qcow2" - } + type = string + description = "name of source image" } variable "source_image" { @@ -174,7 +170,7 @@ source "openstack" "openhpc" { # Input image: source_image = "${var.source_image[var.os_version]}" - source_image_name = "${var.source_image_name[var.os_version]}" # NB: must already exist in OpenStack + source_image_name = "${var.source_image_name}" # NB: must already exist in OpenStack # SSH: ssh_username = var.ssh_username From cf417e57385f9e0c97c5e8eaf59d7d888172628d Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Wed, 9 Oct 2024 12:09:56 +0000 Subject: [PATCH 45/47] bump image --- .../.stackhpc/terraform/cluster_image.auto.tfvars.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json index c2a1adef3..6796dc315 100644 --- a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json @@ -1,7 +1,7 @@ { "cluster_image": { - "RL8": "openhpc-RL8-241003-1241-a256bce2", - "RL9": "openhpc-RL9-241003-1241-a256bce2", - "RL9-cuda": "openhpc-cuda-RL9-241003-1242-a256bce2" + "RL8": "openhpc-RL8-241009-1103-527f7bc1", + "RL9": "openhpc-RL9-241009-1103-527f7bc1", + "RL9-cuda": "openhpc-cuda-RL9-241009-1103-527f7bc1" } } \ No newline at end of file From 354b048a6de8e97e5e3952643d8478ff408582b2 Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Wed, 9 Oct 2024 15:18:03 +0000 Subject: [PATCH 46/47] additional PR comments --- .github/workflows/fatimage.yml | 5 ++--- .github/workflows/nightlybuild.yml | 5 ++--- packer/openstack.pkr.hcl | 11 ++++------- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/.github/workflows/fatimage.yml b/.github/workflows/fatimage.yml index 065ce457c..5425eb4e3 100644 --- a/.github/workflows/fatimage.yml +++ b/.github/workflows/fatimage.yml @@ -88,16 +88,15 @@ jobs: cd packer/ packer init . - SOURCE_IMAGE=${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version][matrix.build] }} - PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ - -var "source_image_name=${SOURCE_IMAGE}" \ + -var "source_image_name=${{ env.SOURCE_IMAGE }}" \ openstack.pkr.hcl env: PKR_VAR_os_version: ${{ matrix.os_version }} + SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version][matrix.build] }} - name: Get created image names from manifest id: manifest diff --git a/.github/workflows/nightlybuild.yml b/.github/workflows/nightlybuild.yml index bb6f13dac..4df3f9955 100644 --- a/.github/workflows/nightlybuild.yml +++ b/.github/workflows/nightlybuild.yml @@ -85,17 +85,16 @@ jobs: cd packer/ packer init . - SOURCE_IMAGE=${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version] }} - PACKER_LOG=1 packer build \ -on-error=${{ vars.PACKER_ON_ERROR }} \ -only=${{ matrix.build }} \ -var-file=$PKR_VAR_environment_root/${{ env.CI_CLOUD }}.pkrvars.hcl \ - -var "source_image_name=${SOURCE_IMAGE}" + -var "source_image_name=${{ env.SOURCE_IMAGE }}" openstack.pkr.hcl env: PKR_VAR_os_version: ${{ matrix.os_version }} + SOURCE_IMAGE: ${{ fromJSON(env.SOURCE_IMAGES_MAP)[matrix.os_version] }} - name: Get created image names from manifest id: manifest diff --git a/packer/openstack.pkr.hcl b/packer/openstack.pkr.hcl index 733899d72..ae5744ff3 100644 --- a/packer/openstack.pkr.hcl +++ b/packer/openstack.pkr.hcl @@ -52,12 +52,9 @@ variable "source_image_name" { } variable "source_image" { - type = map(string) - default = { - RL8: null - RL9: null - } - description = "UUID of source image, keyed from var.os_version" + type = string + default = null + description = "UUID of source image" } variable "flavor" { @@ -169,7 +166,7 @@ source "openstack" "openhpc" { security_groups = var.security_groups # Input image: - source_image = "${var.source_image[var.os_version]}" + source_image = "${var.source_image}" source_image_name = "${var.source_image_name}" # NB: must already exist in OpenStack # SSH: From 169b42d99fcb197da8ecb6a57bdb62e1f4071cbb Mon Sep 17 00:00:00 2001 From: bertiethorpe Date: Wed, 9 Oct 2024 15:57:19 +0000 Subject: [PATCH 47/47] bump image --- .../.stackhpc/terraform/cluster_image.auto.tfvars.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json index 6796dc315..f62c8886e 100644 --- a/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json +++ b/environments/.stackhpc/terraform/cluster_image.auto.tfvars.json @@ -1,7 +1,7 @@ { "cluster_image": { - "RL8": "openhpc-RL8-241009-1103-527f7bc1", - "RL9": "openhpc-RL9-241009-1103-527f7bc1", - "RL9-cuda": "openhpc-cuda-RL9-241009-1103-527f7bc1" + "RL8": "openhpc-RL8-241009-1523-354b048a", + "RL9": "openhpc-RL9-241009-1523-354b048a", + "RL9-cuda": "openhpc-cuda-RL9-241009-1523-354b048a" } } \ No newline at end of file