Skip to content

FBGEMM_GPU PIP Installation Test #845

FBGEMM_GPU PIP Installation Test

FBGEMM_GPU PIP Installation Test #845

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
# This workflow is used for testing the download and installation of FBGEMM_GPU
# nightly releases published to PyTorch PyPI.
name: FBGEMM_GPU PIP Installation Test
on:
# Cron Trigger (UTC)
#
# Based on the the nightly releases schedule in PyTorch infrastructure, the
# wheels are published to PyTorch PIP at around 11:30 UTC every day. After
# publication, it can take up to 30 minutes for the wheels to be published, as
# the re-indexing job is scheduled to run every 30 minutes. As such, we set
# the PIP install + test workflow to be kicked off 4 hours after the publish
# job is kicked off to give ample time for the nightly wheel to be available
# in PyTorch PIP.
#
schedule:
- cron: '30 15 * * *'
# Manual Trigger
#
workflow_dispatch:
inputs:
pytorch-channel-version:
description: PyTorch Version (e.g. '2.8.0', 'nightly', 'test')
type: string
required: true
default: "nightly"
fbgemm-channel-version:
description: FBGEMM-GPU Channel + Version (e.g. '1.3.0', 'nightly', 'test/1.3.0')
type: string
required: true
default: "nightly"
fbgemm-build-target:
description: FBGEMM-GPU Build Target
type: choice
required: true
options: [ "default", "genai" ]
default: "cpu"
fbgemm-build-variant:
description: FBGEMM-GPU Variant
type: choice
required: true
options: [ "cpu", "cuda", "rocm" ]
default: "cpu"
jobs:
generate-test-matrix-cuda:
if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm-build-variant == 'cuda')) }}
uses: ./.github/workflows/_fbgemm_gpu_generate_ci_matrix.yml
with:
repo-owner: ${{ github.repository_owner }}
repo-ref: ${{ (github.event_name == 'schedule' && 'nightly') || github.ref }}
targets: ${{ github.event.inputs.fbgemm-build-target || 'default,genai' }}
variant: cuda
jobtype: install
test-pypi-install-cuda:
if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm-build-variant == 'cuda')) }}
needs: generate-test-matrix-cuda
uses: ./.github/workflows/_fbgemm_gpu_cuda_test.yml
with:
matrix: ${{ needs.generate-test-matrix-cuda.outputs.matrix }}
repo-ref: ${{ (github.event_name == 'schedule' && 'nightly') || github.ref }}
pytorch-channel-version: ${{ github.event.inputs.pytorch-channel-version || 'nightly' }}
fbgemm-channel-version: ${{ github.event.inputs.fbgemm-channel-version || 'nightly' }}
extra-env: >-
{
"ENFORCE_CUDA_DEVICE": 1
}
test_pypi_install_cpu:
if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm-build-variant == 'cpu')) }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
options: --user root
defaults:
run:
shell: bash
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: test_install
BUILD_VARIANT: cpu
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.4xlarge", timeout: 20 },
{ arch: arm, instance: "linux.arm64.m7g.4xlarge", timeout: 30 },
]
python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
steps:
- name: Setup Build Container
run: yum update -y; yum install -y binutils findutils git pciutils sudo wget which
- name: Checkout the Repository
uses: actions/checkout@v4
- name: Display System Info
run: . $PRELUDE; print_system_info; print_ec2_info
- name: Display GPU Info
run: . $PRELUDE; print_gpu_info
- name: Setup Miniconda
run: . $PRELUDE; setup_miniconda $HOME/miniconda
- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
- name: Install C/C++ Compilers for Updated LIBGCC
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
- name: Install Build Tools
run: . $PRELUDE; install_build_tools $BUILD_ENV
- name: Install PyTorch-CPU
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ github.event.inputs.pytorch-channel-version || 'nightly' }} cpu
- name: Collect PyTorch Environment Info
if: ${{ success() || failure() }}
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
- name: Install FBGEMM_GPU-CPU
run: . $PRELUDE; install_fbgemm_gpu_pip $BUILD_ENV ${{ github.event.inputs.fbgemm-channel-version || 'nightly' }} cpu
- name: Test with PyTest
timeout-minutes: ${{ matrix.host-machine.timeout }}
run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
test_pypi_install_rocm:
if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm-build-variant == 'rocm')) }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: "rocm/dev-ubuntu-22.04:${{ matrix.rocm-version }}-complete"
options: --user root --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size 16G --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined
defaults:
run:
shell: bash
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: test_install
BUILD_VARIANT: rocm
ENFORCE_ROCM_DEVICE: 1
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.rocm.gpu.2" },
]
# ROCm machines are limited, so we only test a subset of Python versions
python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
rocm-version: [ "6.3", "6.4" ]
steps:
- name: Setup Build Container
run: |
apt update -y
apt install -y git wget
git config --global --add safe.directory '*'
- name: Checkout the Repository
uses: actions/checkout@v4
- name: Display System Info
run: . $PRELUDE; print_system_info
- name: Display GPU Info
run: . $PRELUDE; print_gpu_info
- name: Free Disk Space
run: . $PRELUDE; free_disk_space
- name: Setup Miniconda
run: . $PRELUDE; setup_miniconda $HOME/miniconda
- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
- name: Install ROCm AMD-SMI
run: . $PRELUDE; install_rocm_amdsmi_ubuntu $BUILD_ENV
- name: Install Build Tools
run: . $PRELUDE; install_build_tools $BUILD_ENV
- name: Install PyTorch-ROCm
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ github.event.inputs.pytorch-channel-version || 'nightly' }} rocm/${{ matrix.rocm-version }}
- name: Collect PyTorch Environment Info
if: ${{ success() || failure() }}
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
- name: Install FBGEMM_GPU-ROCm
run: . $PRELUDE; install_fbgemm_gpu_pip $BUILD_ENV ${{ github.event.inputs.fbgemm-channel-version || 'nightly' }} rocm/${{ matrix.rocm-version }}
- name: Test with PyTest
timeout-minutes: 60
run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV