BioImage.IO Model Testing #20

Workflow file for this run

.github/workflows/model-testing.yml at 3bb7bba

	name: BioImage.IO Model Testing

	on:
	schedule:
	# Run daily at 2:00 AM UTC
	- cron: '0 2 * * *'
	pull_request:
	# Run on pull requests to test changes
	types: [opened, synchronize, reopened]
	workflow_dispatch:
	inputs:
	skip_exists:
	description: 'Skip models that already have test results'
	required: false
	default: 'true'
	type: choice
	options:
	- 'true'
	- 'false'
	model_ids:
	description: 'Specific model IDs to test (comma-separated, leave empty for all)'
	required: false
	type: string
	dry_run:
	description: "Run in dry-run mode (don't update artifacts)"
	required: false
	default: false
	type: boolean

	jobs:
	test-bioimageio-models:
	runs-on: ubuntu-latest
	timeout-minutes: ${{ github.event_name == 'pull_request' && 60 \|\| 480 }} # 1 hour for PRs, 8 hours for scheduled runs

	steps:
	- name: Checkout repository
	uses: actions/checkout@v4

	- name: Set up Python
	uses: actions/setup-python@v4
	with:
	python-version: '3.11'

	- name: Cache pip dependencies
	uses: actions/cache@v3
	with:
	path: ~/.cache/pip
	key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt', 'requirements-dev.txt') }}
	restore-keys: \|
	${{ runner.os }}-pip-

	- name: Install dependencies
	run: \|
	python -m pip install --upgrade pip
	pip install hypha-rpc numpy httpx pydantic

	- name: Create test results directory
	run: mkdir -p bioimageio_test_reports

	- name: Run BioImage.IO model tests
	env:
	HYPHA_TOKEN: ${{ secrets.HYPHA_TOKEN }}
	run: \|
	cd scripts

	# Build command arguments
	ARGS=""

	# Handle skip_exists argument
	if [ "${{ github.event.inputs.skip_exists }}" = "false" ]; then
	ARGS="$ARGS --no-skip-exists"
	else
	ARGS="$ARGS --skip-exists"
	fi

	# Handle model_ids argument
	if [ -n "${{ github.event.inputs.model_ids }}" ]; then
	# Convert comma-separated list to space-separated for the script
	MODEL_IDS=$(echo "${{ github.event.inputs.model_ids }}" \| tr ',' ' ')
	ARGS="$ARGS --model-ids $MODEL_IDS"
	fi

	# Handle dry_run argument
	if [ "${{ github.event.inputs.dry_run }}" = "true" ]; then
	ARGS="$ARGS --dry-run"
	fi

	# For pull requests, always run in dry-run mode to avoid updating artifacts
	if [ "${{ github.event_name }}" = "pull_request" ]; then
	ARGS="$ARGS --dry-run --skip-exists"
	echo "Running in dry-run mode for pull request"

	# For PR testing, limit to a small subset of models for faster feedback
	# This can be overridden by manually specifying model_ids in workflow_dispatch
	if [ -z "${{ github.event.inputs.model_ids }}" ]; then
	echo "Using limited model set for pull request testing"
	# You can modify this list to include specific models for PR testing
	ARGS="$ARGS --model-ids affable-shark chatty-frog zealous-oxen"
	fi
	fi

	# For scheduled runs, always use skip-exists to avoid re-testing
	if [ "${{ github.event_name }}" = "schedule" ]; then
	ARGS="--skip-exists"
	fi

	echo "Running: python bioimageio_test_reports.py $ARGS"
	python bioimageio_test_reports.py $ARGS

	- name: Upload test results as artifact
	uses: actions/upload-artifact@v4
	if: always()
	with:
	name: bioimageio-test-results-${{ github.run_number }}
	path: bioimageio_test_reports/
	retention-days: 30

	- name: Generate summary report
	if: always()
	run: \|
	echo "# BioImage.IO Test Results Summary" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY

	# Add dry-run notice for pull requests
	if [ "${{ github.event_name }}" = "pull_request" ]; then
	echo "🧪 Pull Request Test Mode" >> $GITHUB_STEP_SUMMARY
	echo "- Running in DRY-RUN mode (no artifacts updated)" >> $GITHUB_STEP_SUMMARY
	echo "- Testing limited model subset for faster feedback" >> $GITHUB_STEP_SUMMARY
	echo "- Using existing test results where available" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	fi

	# Use the analysis function to get statistics
	cd scripts
	ANALYSIS_OUTPUT=$(python3 bioimageio_test_reports.py --analyze-results)
	echo "$ANALYSIS_OUTPUT"

	# Parse the analysis output
	eval "$ANALYSIS_OUTPUT"

	if [ "$TOTAL_MODELS" -gt 0 ]; then
	echo "Total models tested: $TOTAL_MODELS" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY

	# Main results table
	echo "\| Test Type \| Passed \| Total \| Rate \|" >> $GITHUB_STEP_SUMMARY
	echo "\|-----------\|--------\|-------\|------\|" >> $GITHUB_STEP_SUMMARY
	echo "\| RDF Validation \| $PASSED_RDF \| $TOTAL_MODELS \| ${RDF_RATE}% \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Model Test Run \| $PASSED_MODEL \| $TOTAL_MODELS \| ${MODEL_RATE}% \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Reproduce Outputs \| $PASSED_REPRODUCE \| $TOTAL_MODELS \| ${REPRODUCE_RATE}% \|" >> $GITHUB_STEP_SUMMARY

	echo "" >> $GITHUB_STEP_SUMMARY

	# Score and performance metrics
	echo "## Performance Metrics" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "\| Metric \| Value \|" >> $GITHUB_STEP_SUMMARY
	echo "\|--------\|-------\|" >> $GITHUB_STEP_SUMMARY
	echo "\| Total Score \| $TOTAL_SCORE \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Average Score per Model \| ${AVERAGE_SCORE} \|" >> $GITHUB_STEP_SUMMARY

	# Add execution time info if available
	if [ -n "$TOTAL_EXECUTION_TIME" ] && [ "$TOTAL_EXECUTION_TIME" != "0.00" ]; then
	echo "\| Total Execution Time \| ${TOTAL_EXECUTION_TIME}s \|" >> $GITHUB_STEP_SUMMARY
	echo "\| Average Execution Time \| ${AVERAGE_EXECUTION_TIME}s \|" >> $GITHUB_STEP_SUMMARY
	fi

	echo "" >> $GITHUB_STEP_SUMMARY
	echo "Test completed at: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY

	# Add score explanation
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "### Scoring System" >> $GITHUB_STEP_SUMMARY
	echo "- Each model gets 1 point for each test that passes (max 3 points per model)" >> $GITHUB_STEP_SUMMARY
	echo "- Collection score: 3 points per model that passes all tests" >> $GITHUB_STEP_SUMMARY
	echo "- Perfect score would be: $(($TOTAL_MODELS * 3)) points" >> $GITHUB_STEP_SUMMARY
	else
	echo "No test results found." >> $GITHUB_STEP_SUMMARY
	fi

	- name: Comment on failure
	if: failure()
	run: \|
	echo "# ❌ BioImage.IO Tests Failed" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "The daily BioImage.IO model testing workflow has failed." >> $GITHUB_STEP_SUMMARY
	echo "Please check the logs for more details." >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "Failed at: $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

BioImage.IO Model Testing #20

Workflow file

BioImage.IO Model Testing #20

Uh oh!

Jobs

Run details

Workflow file for this run