Skip to content

[fbgemm_gpu] Migate pip install workflows to reusable workflows #799

[fbgemm_gpu] Migate pip install workflows to reusable workflows

[fbgemm_gpu] Migate pip install workflows to reusable workflows #799

Workflow file for this run

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
# This workflow is used for testing the download and installation of FBGEMM_GPU
# nightly releases published to PyTorch PyPI.
name: FBGEMM_GPU PIP Install + Test
on:
# PR Trigger (enabled for regression checks and debugging)
#
pull_request:
branches:
- main
# Cron Trigger (UTC)
#
# Based on the the nightly releases schedule in PyTorch infrastructure, the
# wheels are published to PyTorch PIP at around 11:30 UTC every day. After
# publication, it can take up to 30 minutes for the wheels to be published, as
# the re-indexing job is scheduled to run every 30 minutes. As such, we set
# the PIP install + test workflow to be kicked off 4 hours after the publish
# job is kicked off to give ample time for the nightly wheel to be available
# in PyTorch PIP.
#
schedule:
- cron: '30 15 * * *'
# Manual Trigger
#
workflow_dispatch:
inputs:
pytorch-channel-version:
description: PyTorch Version (e.g. '2.8.0', 'nightly', 'test')
type: string
required: true
default: "nightly"
fbgemm-channel-version:
description: FBGEMM-GPU Channel + Version (e.g. '1.3.0', 'nightly', 'test/1.3.0')
type: string
required: true
default: "nightly"
fbgemm-build-target:
description: FBGEMM-GPU Build Target
type: choice
required: true
options: [ "default", "genai" ]
default: "cpu"
fbgemm-build-variant:
description: FBGEMM-GPU Variant
type: choice
required: true
options: [ "cpu", "cuda", "rocm" ]
default: "cpu"
jobs:
generate-test-matrix-cuda:
if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm-build-variant == 'cuda')) }}
uses: ./.github/workflows/_fbgemm_gpu_generate_ci_matrix.yml
with:
repo-owner: ${{ github.repository_owner }}
repo-ref: ${{ (github.event_name == 'schedule' && 'nightly') || github.ref }}
targets: ${{ github.event.inputs.fbgemm-build-target || 'default,genai' }}
variant: cuda
jobtype: test
test-pypi-install-cuda:
if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm-build-variant == 'cuda')) }}
needs: generate-test-matrix-cuda
uses: ./.github/workflows/_fbgemm_gpu_cuda_test.yml
with:
matrix: ${{ needs.generate-test-matrix-cuda.outputs.matrix }}
repo-ref: ${{ (github.event_name == 'schedule' && 'nightly') || github.ref }}
pytorch-channel-version: ${{ github.event.inputs.pytorch-channel-version || 'nightly' }}
fbgemm-channel-version: ${{ github.event.inputs.fbgemm-channel-version || 'nightly' }}
installation-source: pip

Check failure on line 78 in .github/workflows/fbgemm_gpu_pip.yml

View workflow run for this annotation

GitHub Actions / FBGEMM_GPU PIP Install + Test

Invalid workflow file

The workflow is not valid. .github/workflows/fbgemm_gpu_pip.yml (Line: 78, Col: 28): Invalid input, installation-source is not defined in the referenced workflow.
extra-env: >-
{
"ENFORCE_CUDA_DEVICE": 1
}
test_pypi_install_cpu:
if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm-build-variant == 'cpu')) }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
options: --user root
defaults:
run:
shell: bash
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: test_install
BUILD_VARIANT: cpu
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.4xlarge", timeout: 20 },
{ arch: arm, instance: "linux.arm64.m7g.4xlarge", timeout: 30 },
]
python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
steps:
- name: Setup Build Container
run: yum update -y; yum install -y binutils findutils git pciutils sudo wget which
- name: Checkout the Repository
uses: actions/checkout@v4
- name: Display System Info
run: . $PRELUDE; print_system_info; print_ec2_info
- name: Display GPU Info
run: . $PRELUDE; print_gpu_info
- name: Setup Miniconda
run: . $PRELUDE; setup_miniconda $HOME/miniconda
- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
- name: Install C/C++ Compilers for Updated LIBGCC
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
- name: Install Build Tools
run: . $PRELUDE; install_build_tools $BUILD_ENV
- name: Install PyTorch-CPU
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ github.event.inputs.pytorch-channel-version || 'nightly' }} cpu
- name: Collect PyTorch Environment Info
if: ${{ success() || failure() }}
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
- name: Install FBGEMM_GPU-CPU
run: . $PRELUDE; install_fbgemm_gpu_pip $BUILD_ENV ${{ github.event.inputs.fbgemm-channel-version || 'nightly' }} cpu
- name: Test with PyTest
timeout-minutes: ${{ matrix.host-machine.timeout }}
run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
# test_pypi_install_cuda:
# if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm-build-variant == 'cuda')) }}
# runs-on: ${{ matrix.host-machine.instance }}
# defaults:
# run:
# shell: bash
# env:
# PRELUDE: .github/scripts/setup_env.bash
# BUILD_ENV: test_install
# BUILD_VARIANT: cuda
# BUILD_CUDA_VERSION: ${{ matrix.cuda-version }}
# ENFORCE_CUDA_DEVICE: 1
# strategy:
# fail-fast: false
# matrix:
# host-machine: [
# { instance: "linux.g5.4xlarge.nvidia.gpu" },
# ]
# python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
# cuda-version: [ "12.6.3", "12.8.1", "12.9.1" ]
# steps:
# # Cannot upgrade to actions/checkout@v4 yet because GLIBC on the instance is too old
# - name: Checkout the Repository
# uses: actions/checkout@v4
# - name: Install NVIDIA Drivers and NVIDIA-Docker Runtime
# uses: pytorch/test-infra/.github/actions/setup-nvidia@main
# - name: Display System Info
# run: . $PRELUDE; print_system_info; print_ec2_info
# - name: Display GPU Info
# run: . $PRELUDE; print_gpu_info
# - name: Setup Miniconda
# run: . $PRELUDE; setup_miniconda $HOME/miniconda
# - name: Create Conda Environment
# run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
# - name: Install C/C++ Compilers for Updated LIBGCC
# # NOTE: gcc is required for torch dynamo to work properly, as some of
# # the compilation flags used by torch dynamo are gcc-specific:
# #
# # clang-16: error: unknown argument: '-fno-tree-loop-vectorize'
# run: . $PRELUDE; install_cxx_compiler $BUILD_ENV gcc
# - name: Install Build Tools
# run: . $PRELUDE; install_build_tools $BUILD_ENV
# - name: Install CUDA
# run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}
# - name: Install PyTorch-CUDA
# run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ github.event.inputs.pytorch-channel-version || 'nightly' }} cuda/${{ matrix.cuda-version }}
# - name: Collect PyTorch Environment Info
# if: ${{ success() || failure() }}
# run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
# - name: Install FBGEMM_GPU-CUDA
# run: . $PRELUDE; install_fbgemm_gpu_pip $BUILD_ENV ${{ github.event.inputs.fbgemm-channel-version || 'nightly' }} cuda/${{ matrix.cuda-version }}
# - name: Test with PyTest
# timeout-minutes: 60
# run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
test_pypi_install_rocm:
if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm-build-variant == 'rocm')) }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: "rocm/dev-ubuntu-22.04:${{ matrix.rocm-version }}-complete"
options: --user root --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size 16G --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined
defaults:
run:
shell: bash
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: test_install
BUILD_VARIANT: rocm
ENFORCE_ROCM_DEVICE: 1
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.rocm.gpu.2" },
]
# ROCm machines are limited, so we only test a subset of Python versions
python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
rocm-version: [ "6.3", "6.4" ]
steps:
- name: Setup Build Container
run: |
apt update -y
apt install -y git wget
git config --global --add safe.directory '*'
- name: Checkout the Repository
uses: actions/checkout@v4
- name: Display System Info
run: . $PRELUDE; print_system_info
- name: Display GPU Info
run: . $PRELUDE; print_gpu_info
- name: Free Disk Space
run: . $PRELUDE; free_disk_space
- name: Setup Miniconda
run: . $PRELUDE; setup_miniconda $HOME/miniconda
- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
- name: Install ROCm AMD-SMI
run: . $PRELUDE; install_rocm_amdsmi_ubuntu $BUILD_ENV
- name: Install Build Tools
run: . $PRELUDE; install_build_tools $BUILD_ENV
- name: Install PyTorch-ROCm
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ github.event.inputs.pytorch-channel-version || 'nightly' }} rocm/${{ matrix.rocm-version }}
- name: Collect PyTorch Environment Info
if: ${{ success() || failure() }}
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
- name: Install FBGEMM_GPU-ROCm
run: . $PRELUDE; install_fbgemm_gpu_pip $BUILD_ENV ${{ github.event.inputs.fbgemm-channel-version || 'nightly' }} rocm/${{ matrix.rocm-version }}
- name: Test with PyTest
timeout-minutes: 60
run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV