Skip to content

[POP-2916] Change intra_batch_rule to AND #5175

[POP-2916] Change intra_batch_rule to AND

[POP-2916] Change intra_batch_rule to AND #5175

Workflow file for this run

name: Rust GPU Tests
on:
pull_request:
branches-ignore:
- dev
concurrency:
group: "${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}"
cancel-in-progress: true
jobs:
gpu-e2e:
runs-on: arc-gpu-amd64-runner
timeout-minutes: 25
steps:
- name: Checkout code
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8
- name: Get all test, doc and src files that have changed
id: changed-files-yaml
uses: tj-actions/changed-files@24d32ffd492484c1d75e0c0b894501ddb9d30d62
with:
files_yaml: |
src:
- Dockerfile*
- Cargo.lock
- Cargo.toml
- deny.toml
- rust-toolchain.toml
- iris-*/**
- migrations/**
- scripts/**
- .github/workflows/test-gpu.yaml
# The following steps will only run if any of the src files have changed
- name: Validate presence of GPU devices
if: steps.changed-files-yaml.outputs.src_any_changed == 'true'
run: nvidia-smi
- name: Check shared memory size
if: steps.changed-files-yaml.outputs.src_any_changed == 'true'
run: df -h
- name: Update gcc to version 11
if: steps.changed-files-yaml.outputs.src_any_changed == 'true'
run: |
sudo apt-get update
sudo apt-get install --reinstall ca-certificates
sudo apt-get install -y build-essential manpages-dev software-properties-common
sudo add-apt-repository ppa:ubuntu-toolchain-r/test
sudo apt-get update
sudo apt-get install -y gcc-11 g++-11
sudo ln -sf /usr/bin/gcc-11 /usr/bin/gcc
gcc --version
- name: Install OpenSSL && pkg-config && protobuf-compiler
if: steps.changed-files-yaml.outputs.src_any_changed == 'true'
run: sudo apt-get update && sudo apt-get install -y pkg-config libssl-dev protobuf-compiler
- name: Install CUDA and NCCL dependencies
if: steps.changed-files-yaml.outputs.src_any_changed == 'true' &&
steps.cache-cuda-nccl.outputs.cache-hit != 'true'
env:
DEBIAN_FRONTEND: noninteractive
run: |
sudo apt install -y wget
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt update
sudo apt install -y cuda-toolkit-12-2 cuda-command-line-tools-12-2 libnccl2 libnccl-dev
- name: Find libs
if: steps.changed-files-yaml.outputs.src_any_changed == 'true'
run: find /usr -name "libnvrtc*" && find /usr -name libcuda.so
- name: Cache Rust build
if: steps.changed-files-yaml.outputs.src_any_changed == 'true'
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809
id: cache-rust
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: rust-build-${{ runner.os }}-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
rust-build-${{ runner.os }}-
- name: Find libs
if: steps.changed-files-yaml.outputs.src_any_changed == 'true'
run: find /usr -name "libnvrtc*" && find /usr -name libcuda.so
- name: Install Rust
if: steps.changed-files-yaml.outputs.src_any_changed == 'true'
uses: dtolnay/rust-toolchain@master
with:
toolchain: 1.85.0
- name: GPU Dependent Tests
if: steps.changed-files-yaml.outputs.src_any_changed == 'true'
timeout-minutes: 20
run: cargo test -p iris-mpc-gpu --release --features gpu_dependent -- --test-threads=1
shell: bash
env:
NCCL_P2P_LEVEL: LOC
NCCL_NET: Socket
NCCL_P2P_DIRECT_DISABLE: 1
NCCL_SHM_DISABLE: 1