Skip to content

Add more server metrics for loop lag monitoring #28445

Add more server metrics for loop lag monitoring

Add more server metrics for loop lag monitoring #28445

Workflow file for this run

name: Python Tests
on:
# Trigger the workflow on push or pull request,
# but only for the main branch
push:
branches:
- master
- 'releases/**'
pull_request:
branches:
- master
- 'releases/**'
merge_group:
jobs:
python-test:
strategy:
matrix:
python-version: ["3.8"]
test-path:
# Group them based on running time to save CI time and resources
- tests/unit_tests
- tests/test_cli.py
- "tests/test_optimizer_dryruns.py -k \"partial\""
- "tests/test_optimizer_dryruns.py -k \"not partial\""
- tests/test_jobs_and_serve.py tests/test_yaml_parser.py tests/test_global_user_state.py tests/test_config.py tests/test_jobs.py tests/test_list_accelerators.py tests/test_wheels.py tests/test_api.py tests/test_storage.py tests/test_api_compatibility.py
- tests/test_no_parellel.py
- tests/test_optimizer_random_dag.py
include:
# We separate out the random DAG tests because its flaky due to catalog updates.
# TODO(zeping): Move back to the main test group once we fixed the flakiness.
- test-path: tests/unit_tests
test-name: "Unit Tests"
- test-path: tests/test_cli.py
test-name: "CLI Tests"
- test-path: "tests/test_optimizer_dryruns.py -k \"partial\""
test-name: "Optimizer Dryruns Part 1"
- test-path: "tests/test_optimizer_dryruns.py -k \"not partial\""
test-name: "Optimizer Dryruns Part 2"
- test-path: tests/test_jobs_and_serve.py tests/test_yaml_parser.py tests/test_global_user_state.py tests/test_config.py tests/test_jobs.py tests/test_list_accelerators.py tests/test_wheels.py tests/test_api.py tests/test_storage.py tests/test_api_compatibility.py
test-name: "Jobs, Serve, Wheels, API, Config, Optimizer & Storage Tests"
- test-path: tests/test_no_parellel.py
test-name: "No Parallel Tests"
- test-path: tests/test_optimizer_random_dag.py
test-name: "Optimizer Random DAG Tests"
runs-on: ubuntu-latest
name: "Python Tests - ${{ matrix.test-name }}"
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install --prerelease=allow "azure-cli>=2.65.0"
# Use -e to include examples and tests folder in the path for unit
# tests to access them.
uv pip install -e ".[all]"
uv pip install pytest pytest-xdist pytest-env>=0.6 pytest-asyncio memory-profiler==0.61.0
- name: Run tests with pytest
env:
TEST_PATH: ${{ matrix.test-path }}
TEST_NAME: ${{ matrix.test-name }}
run: |
source ~/test-env/bin/activate
if [[ "$TEST_NAME" == "No Parallel Tests" ]]; then
SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 eval "pytest -n 0 --dist no $TEST_PATH"
else
SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 eval "pytest -n 4 --dist worksteal $TEST_PATH"
fi
limited-deps-test:
# Test with limited dependencies to ensure cloud module imports don't break
# basic functionality (e.g., importing IBM shouldn't break AWS support)
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8"]
test-path:
- tests/test_jobs_and_serve.py tests/test_cli.py
include:
- test-path: tests/test_jobs_and_serve.py tests/test_cli.py
test-name: "Limited Deps - Jobs, Serve & CLI"
name: "Python Tests - ${{ matrix.test-name }}"
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install limited dependencies
run: |
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install --prerelease=allow "azure-cli>=2.65.0"
# Install limited dependencies only
uv pip install -e ".[kubernetes,aws,gcp,azure]"
uv pip install pytest pytest-xdist pytest-env>=0.6 pytest-asyncio memory-profiler==0.61.0
- name: Run tests with pytest
env:
TEST_PATH: ${{ matrix.test-path }}
run: |
source ~/test-env/bin/activate
SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 eval "pytest -n 0 --dist no $TEST_PATH"
failover-test:
# Test failover functionality
runs-on: ubuntu-latest
strategy:
matrix:
# moto requires >=3.9
python-version: ["3.10"]
test-path:
- tests/test_failover.py
include:
- test-path: tests/test_failover.py
test-name: "Failover Tests"
name: "Python Tests - ${{ matrix.test-name }}"
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install --prerelease=allow "azure-cli>=2.65.0"
# Use -e to include examples and tests folder in the path for unit
# tests to access them.
uv pip install -e ".[all]"
uv pip install pytest pytest-xdist pytest-env>=0.6 pytest-asyncio memory-profiler==0.61.0 moto==5.1.2
- name: Run tests with pytest
run: |
source ~/test-env/bin/activate
SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 pytest -n 4 --dist worksteal ${{ matrix.test-path }}