Add more server metrics for loop lag monitoring #28445

Workflow file for this run

	name: Python Tests
	on:
	# Trigger the workflow on push or pull request,
	# but only for the main branch
	push:
	branches:
	- master
	- 'releases/**'
	pull_request:
	branches:
	- master
	- 'releases/**'
	merge_group:

	jobs:
	python-test:
	strategy:
	matrix:
	python-version: ["3.8"]
	test-path:
	# Group them based on running time to save CI time and resources
	- tests/unit_tests
	- tests/test_cli.py
	- "tests/test_optimizer_dryruns.py -k \"partial\""
	- "tests/test_optimizer_dryruns.py -k \"not partial\""
	- tests/test_jobs_and_serve.py tests/test_yaml_parser.py tests/test_global_user_state.py tests/test_config.py tests/test_jobs.py tests/test_list_accelerators.py tests/test_wheels.py tests/test_api.py tests/test_storage.py tests/test_api_compatibility.py
	- tests/test_no_parellel.py
	- tests/test_optimizer_random_dag.py
	include:
	# We separate out the random DAG tests because its flaky due to catalog updates.
	# TODO(zeping): Move back to the main test group once we fixed the flakiness.
	- test-path: tests/unit_tests
	test-name: "Unit Tests"
	- test-path: tests/test_cli.py
	test-name: "CLI Tests"
	- test-path: "tests/test_optimizer_dryruns.py -k \"partial\""
	test-name: "Optimizer Dryruns Part 1"
	- test-path: "tests/test_optimizer_dryruns.py -k \"not partial\""
	test-name: "Optimizer Dryruns Part 2"
	- test-path: tests/test_jobs_and_serve.py tests/test_yaml_parser.py tests/test_global_user_state.py tests/test_config.py tests/test_jobs.py tests/test_list_accelerators.py tests/test_wheels.py tests/test_api.py tests/test_storage.py tests/test_api_compatibility.py
	test-name: "Jobs, Serve, Wheels, API, Config, Optimizer & Storage Tests"
	- test-path: tests/test_no_parellel.py
	test-name: "No Parallel Tests"
	- test-path: tests/test_optimizer_random_dag.py
	test-name: "Optimizer Random DAG Tests"
	runs-on: ubuntu-latest
	name: "Python Tests - ${{ matrix.test-name }}"
	steps:
	- name: Checkout repository
	uses: actions/checkout@v3
	- name: Install the latest version of uv
	uses: astral-sh/setup-uv@v4
	with:
	version: "latest"
	python-version: ${{ matrix.python-version }}
	- name: Install dependencies
	run: \|
	uv venv --seed ~/test-env
	source ~/test-env/bin/activate
	uv pip install --prerelease=allow "azure-cli>=2.65.0"
	# Use -e to include examples and tests folder in the path for unit
	# tests to access them.
	uv pip install -e ".[all]"
	uv pip install pytest pytest-xdist pytest-env>=0.6 pytest-asyncio memory-profiler==0.61.0
	- name: Run tests with pytest
	env:
	TEST_PATH: ${{ matrix.test-path }}
	TEST_NAME: ${{ matrix.test-name }}
	run: \|
	source ~/test-env/bin/activate
	if [[ "$TEST_NAME" == "No Parallel Tests" ]]; then
	SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 eval "pytest -n 0 --dist no $TEST_PATH"
	else
	SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 eval "pytest -n 4 --dist worksteal $TEST_PATH"
	fi

	limited-deps-test:
	# Test with limited dependencies to ensure cloud module imports don't break
	# basic functionality (e.g., importing IBM shouldn't break AWS support)
	runs-on: ubuntu-latest
	strategy:
	matrix:
	python-version: ["3.8"]
	test-path:
	- tests/test_jobs_and_serve.py tests/test_cli.py
	include:
	- test-path: tests/test_jobs_and_serve.py tests/test_cli.py
	test-name: "Limited Deps - Jobs, Serve & CLI"
	name: "Python Tests - ${{ matrix.test-name }}"
	steps:
	- name: Checkout repository
	uses: actions/checkout@v3
	- name: Install the latest version of uv
	uses: astral-sh/setup-uv@v4
	with:
	version: "latest"
	python-version: ${{ matrix.python-version }}
	- name: Install limited dependencies
	run: \|
	uv venv --seed ~/test-env
	source ~/test-env/bin/activate
	uv pip install --prerelease=allow "azure-cli>=2.65.0"
	# Install limited dependencies only
	uv pip install -e ".[kubernetes,aws,gcp,azure]"
	uv pip install pytest pytest-xdist pytest-env>=0.6 pytest-asyncio memory-profiler==0.61.0
	- name: Run tests with pytest
	env:
	TEST_PATH: ${{ matrix.test-path }}
	run: \|
	source ~/test-env/bin/activate
	SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 eval "pytest -n 0 --dist no $TEST_PATH"

	failover-test:
	# Test failover functionality
	runs-on: ubuntu-latest
	strategy:
	matrix:
	# moto requires >=3.9
	python-version: ["3.10"]
	test-path:
	- tests/test_failover.py
	include:
	- test-path: tests/test_failover.py
	test-name: "Failover Tests"
	name: "Python Tests - ${{ matrix.test-name }}"
	steps:
	- name: Checkout repository
	uses: actions/checkout@v3
	- name: Install the latest version of uv
	uses: astral-sh/setup-uv@v4
	with:
	version: "latest"
	python-version: ${{ matrix.python-version }}
	- name: Install dependencies
	run: \|
	uv venv --seed ~/test-env
	source ~/test-env/bin/activate
	uv pip install --prerelease=allow "azure-cli>=2.65.0"
	# Use -e to include examples and tests folder in the path for unit
	# tests to access them.
	uv pip install -e ".[all]"
	uv pip install pytest pytest-xdist pytest-env>=0.6 pytest-asyncio memory-profiler==0.61.0 moto==5.1.2
	- name: Run tests with pytest
	run: \|
	source ~/test-env/bin/activate
	SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 pytest -n 4 --dist worksteal ${{ matrix.test-path }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Add more server metrics for loop lag monitoring #28445

Workflow file

Add more server metrics for loop lag monitoring #28445

Uh oh!

Jobs

Run details

Workflow file for this run