Fix tensors_have_same_dim_order for degenerate shapes (semantic equivalence) #9530
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Test ExecuTorch CUDA Build Compatibility | |
| # This workflow tests whether ExecuTorch can be successfully built with CUDA support | |
| # across different CUDA versions (12.6, 12.8, 12.9) using the command: | |
| # ./install_executorch.sh | |
| # | |
| # Note: ExecuTorch automatically detects the system CUDA version using nvcc and | |
| # installs the appropriate PyTorch wheel. No manual CUDA/PyTorch installation needed. | |
| name: Test CUDA Builds | |
| on: | |
| pull_request: | |
| push: | |
| branches: | |
| - main | |
| - release/* | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
| cancel-in-progress: false | |
| jobs: | |
| test-cuda-builds: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| cuda-version: ["12.6", "12.8", "12.9", "13.0"] | |
| name: test-executorch-cuda-build-${{ matrix.cuda-version }} | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
| permissions: | |
| id-token: write | |
| contents: read | |
| with: | |
| timeout: 90 | |
| runner: linux.g5.4xlarge.nvidia.gpu | |
| gpu-arch-type: cuda | |
| gpu-arch-version: ${{ matrix.cuda-version }} | |
| use-custom-docker-registry: false | |
| submodules: recursive | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| script: | | |
| set -eux | |
| # Test ExecuTorch CUDA build - ExecuTorch will automatically detect CUDA version | |
| # and install the appropriate PyTorch wheel | |
| source .ci/scripts/test-cuda-build.sh "${{ matrix.cuda-version }}" | |
| # This job will fail if any of the CUDA versions fail | |
| check-all-cuda-builds: | |
| needs: test-cuda-builds | |
| runs-on: ubuntu-latest | |
| if: always() | |
| steps: | |
| - name: Check if all CUDA builds succeeded | |
| run: | | |
| if [[ "${{ needs.test-cuda-builds.result }}" != "success" ]]; then | |
| echo "ERROR: One or more ExecuTorch CUDA builds failed!" | |
| echo "CUDA build results: ${{ needs.test-cuda-builds.result }}" | |
| exit 1 | |
| else | |
| echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 12.8, 12.9) completed successfully!" | |
| fi | |
| test-models-cuda: | |
| name: test-models-cuda | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
| permissions: | |
| id-token: write | |
| contents: read | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| model: [linear, add, add_mul, resnet18, conv1d, sdpa, mv2, mv3] | |
| with: | |
| timeout: 90 | |
| runner: linux.g5.4xlarge.nvidia.gpu | |
| gpu-arch-type: cuda | |
| gpu-arch-version: 12.6 | |
| use-custom-docker-registry: false | |
| submodules: recursive | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| script: | | |
| set -eux | |
| PYTHON_EXECUTABLE=python ./install_executorch.sh | |
| export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH | |
| PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda | |
| unittest-cuda: | |
| name: unittest-cuda | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
| permissions: | |
| id-token: write | |
| contents: read | |
| with: | |
| timeout: 90 | |
| runner: linux.g5.4xlarge.nvidia.gpu | |
| gpu-arch-type: cuda | |
| gpu-arch-version: 12.6 | |
| use-custom-docker-registry: false | |
| submodules: recursive | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| script: | | |
| set -eux | |
| # Install executorch in editable mode so custom op libs land in-tree | |
| bash ./install_executorch.sh | |
| # Build ExecuTorch with CUDA support | |
| cmake --workflow --preset llm-release-cuda | |
| # Build and run CUDA shim tests (C++) | |
| pushd backends/cuda/runtime/shims/tests | |
| cmake --workflow --preset default | |
| popd | |
| # Run CUDA backend Python tests, overrides addopts so that we don't run all tests in pytest.ini | |
| python -m pytest backends/cuda/tests backends/cuda/passes/tests -v -o "addopts=" | |
| export-model-cuda-artifact: | |
| name: export-model-cuda-artifact | |
| # Skip this job if the pull request is from a fork (HuggingFace secrets are not available) | |
| if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
| permissions: | |
| id-token: write | |
| contents: read | |
| secrets: inherit | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| model: | |
| - repo: "mistralai" | |
| name: "Voxtral-Mini-3B-2507" | |
| - repo: "openai" | |
| name: "whisper-small" | |
| - repo: "openai" | |
| name: "whisper-large-v3-turbo" | |
| - repo: "google" | |
| name: "gemma-3-4b-it" | |
| - repo: "nvidia" | |
| name: "parakeet-tdt" | |
| quant: | |
| - "non-quantized" | |
| - "quantized-int4-tile-packed" | |
| - "quantized-int4-weight-only" | |
| exclude: | |
| # TODO: enable int4-weight-only on gemma3. | |
| - model: | |
| repo: "google" | |
| name: "gemma-3-4b-it" | |
| quant: "quantized-int4-weight-only" | |
| with: | |
| timeout: 90 | |
| secrets-env: EXECUTORCH_HF_TOKEN | |
| runner: linux.g5.4xlarge.nvidia.gpu | |
| gpu-arch-type: cuda | |
| gpu-arch-version: 12.6 | |
| use-custom-docker-registry: false | |
| submodules: recursive | |
| upload-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-${{ matrix.quant }} | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| script: | | |
| set -eux | |
| echo "::group::Setup ExecuTorch" | |
| # Disable MKL to avoid duplicate target error when conda has multiple MKL installations | |
| export USE_MKL=OFF | |
| ./install_executorch.sh | |
| echo "::endgroup::" | |
| # Setup Huggingface only for models that need it (not parakeet) | |
| if [ "${{ matrix.model.name }}" != "parakeet-tdt" ]; then | |
| echo "::group::Setup Huggingface" | |
| pip install -U "huggingface_hub[cli]<1.0" accelerate | |
| huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN | |
| OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) | |
| pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} | |
| echo "::endgroup::" | |
| fi | |
| source .ci/scripts/export_model_artifact.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}" | |
| test-model-cuda-e2e: | |
| name: test-model-cuda-e2e | |
| needs: export-model-cuda-artifact | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
| permissions: | |
| id-token: write | |
| contents: read | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| model: | |
| - repo: "mistralai" | |
| name: "Voxtral-Mini-3B-2507" | |
| - repo: "openai" | |
| name: "whisper-small" | |
| - repo: "openai" | |
| name: "whisper-large-v3-turbo" | |
| - repo: "google" | |
| name: "gemma-3-4b-it" | |
| - repo: "nvidia" | |
| name: "parakeet-tdt" | |
| quant: | |
| - "non-quantized" | |
| - "quantized-int4-tile-packed" | |
| - "quantized-int4-weight-only" | |
| exclude: | |
| # TODO: enable int4-weight-only on gemma3. | |
| - model: | |
| repo: "google" | |
| name: "gemma-3-4b-it" | |
| quant: "quantized-int4-weight-only" | |
| with: | |
| timeout: 90 | |
| runner: linux.g5.4xlarge.nvidia.gpu | |
| gpu-arch-type: cuda | |
| gpu-arch-version: 12.6 | |
| use-custom-docker-registry: false | |
| submodules: recursive | |
| download-artifact: ${{ matrix.model.repo }}-${{ matrix.model.name }}-cuda-${{ matrix.quant }} | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| script: | | |
| source .ci/scripts/test_model_e2e.sh cuda "${{ matrix.model.repo }}/${{ matrix.model.name }}" "${{ matrix.quant }}" "${RUNNER_ARTIFACT_DIR}" | |
| test-cuda-pybind: | |
| name: test-cuda-pybind | |
| needs: export-model-cuda-artifact | |
| # This job downloads models exported by export-model-cuda-artifact and runs them using pybind. | |
| uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
| permissions: | |
| id-token: write | |
| contents: read | |
| secrets: inherit | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| model: ["gemma3-4b"] | |
| quantize: ["", "--quantize"] | |
| with: | |
| timeout: 120 | |
| secrets-env: EXECUTORCH_HF_TOKEN | |
| download-artifact: google-gemma-3-4b-it-cuda-${{ matrix.quantize && 'quantized-int4-tile-packed' || 'non-quantized' }} | |
| runner: linux.g5.4xlarge.nvidia.gpu | |
| gpu-arch-type: cuda | |
| gpu-arch-version: 12.6 | |
| use-custom-docker-registry: false | |
| submodules: recursive | |
| ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
| script: | | |
| set -eux | |
| echo "::group::Setup ExecuTorch" | |
| # Disable MKL to avoid duplicate target error when conda has multiple MKL installations | |
| export USE_MKL=OFF | |
| ./install_executorch.sh | |
| echo "::endgroup::" | |
| echo "::group::Fix libstdc++ GLIBCXX version" | |
| # The embedded .so files in the CUDA blob require GLIBCXX_3.4.29 | |
| # which the default conda libstdc++ doesn't have. Install a newer | |
| # libstdc++ from conda-forge and use it via LD_PRELOAD. | |
| conda install -y -c conda-forge 'libstdcxx-ng>=12' | |
| export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH | |
| # Verify the new libstdc++ has GLIBCXX_3.4.29 | |
| strings /opt/conda/lib/libstdc++.so.6 | grep GLIBCXX_3.4.29 || { | |
| echo "Error: GLIBCXX_3.4.29 not found in /opt/conda/lib/libstdc++.so.6" | |
| exit 1 | |
| } | |
| echo "::endgroup::" | |
| echo "::group::Setup Huggingface" | |
| pip install -U "huggingface_hub[cli]<1.0" | |
| huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN | |
| echo "::endgroup::" | |
| echo "::group::Install optimum-executorch" | |
| OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) | |
| pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION} | |
| echo "::endgroup::" | |
| echo "::group::Test CUDA Multimodal: ${{ matrix.model }} ${{ matrix.quantize }}" | |
| python .ci/scripts/test_huggingface_optimum_model.py \ | |
| --model ${{ matrix.model }} \ | |
| --recipe cuda \ | |
| --model_dir "${RUNNER_ARTIFACT_DIR}" \ | |
| --run_only \ | |
| ${{ matrix.quantize }} | |
| echo "::endgroup::" |