Skip to content

BioImage.IO Model Testing #26

BioImage.IO Model Testing

BioImage.IO Model Testing #26

Workflow file for this run

name: BioImage.IO Model Testing
on:
schedule:
# Run daily at 2:00 AM UTC
- cron: '0 2 * * *'
pull_request:
# Run on pull requests to test changes
types: [opened, synchronize, reopened]
workflow_dispatch:
inputs:
skip_exists:
description: 'Skip models that already have test results'
required: false
default: 'true'
type: choice
options:
- 'true'
- 'false'
model_ids:
description: 'Specific model IDs to test (comma-separated, leave empty for all)'
required: false
type: string
dry_run:
description: "Run in dry-run mode (don't update artifacts)"
required: false
default: false
type: boolean
jobs:
test-bioimageio-models:
runs-on: ubuntu-latest
timeout-minutes: ${{ github.event_name == 'pull_request' && 60 || 480 }} # 1 hour for PRs, 8 hours for scheduled runs
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Cache pip dependencies
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt', 'requirements-dev.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install hypha-rpc numpy httpx pydantic
- name: Create test results directory
run: mkdir -p bioimageio_test_reports
- name: Run BioImage.IO model tests
env:
HYPHA_TOKEN: ${{ secrets.HYPHA_TOKEN }}
run: |
cd scripts
# Build command arguments
ARGS=""
# Handle skip_exists argument
if [ "${{ github.event.inputs.skip_exists }}" = "false" ]; then
ARGS="$ARGS --no-skip-exists"
else
ARGS="$ARGS --skip-exists"
fi
# Handle model_ids argument
if [ -n "${{ github.event.inputs.model_ids }}" ]; then
# Convert comma-separated list to space-separated for the script
MODEL_IDS=$(echo "${{ github.event.inputs.model_ids }}" | tr ',' ' ')
ARGS="$ARGS --model-ids $MODEL_IDS"
fi
# Handle dry_run argument
if [ "${{ github.event.inputs.dry_run }}" = "true" ]; then
ARGS="$ARGS --dry-run"
fi
# For pull requests, always run in dry-run mode to avoid updating artifacts
if [ "${{ github.event_name }}" = "pull_request" ]; then
ARGS="$ARGS --dry-run --skip-exists"
echo "Running in dry-run mode for pull request"
# For PR testing, limit to a small subset of models for faster feedback
# This can be overridden by manually specifying model_ids in workflow_dispatch
if [ -z "${{ github.event.inputs.model_ids }}" ]; then
echo "Using limited model set for pull request testing"
# You can modify this list to include specific models for PR testing
ARGS="$ARGS --model-ids affable-shark chatty-frog zealous-oxen"
fi
fi
# For scheduled runs, always use skip-exists to avoid re-testing
if [ "${{ github.event_name }}" = "schedule" ]; then
ARGS="--skip-exists"
fi
echo "Running: python bioimageio_test_reports.py $ARGS"
python bioimageio_test_reports.py $ARGS
- name: Upload test results as artifact
uses: actions/upload-artifact@v4
if: always()
with:
name: bioimageio-test-results-${{ github.run_number }}
path: bioimageio_test_reports/
retention-days: 30
- name: Generate summary report
if: always()
run: |
echo "# BioImage.IO Test Results Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Add dry-run notice for pull requests
if [ "${{ github.event_name }}" = "pull_request" ]; then
echo "🧪 **Pull Request Test Mode**" >> $GITHUB_STEP_SUMMARY
echo "- Running in DRY-RUN mode (no artifacts updated)" >> $GITHUB_STEP_SUMMARY
echo "- Testing limited model subset for faster feedback" >> $GITHUB_STEP_SUMMARY
echo "- Using existing test results where available" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
fi
# Use the analysis function to get statistics
cd scripts
ANALYSIS_OUTPUT=$(python3 bioimageio_test_reports.py --analyze-results)
echo "$ANALYSIS_OUTPUT"
# Parse the analysis output
eval "$ANALYSIS_OUTPUT"
if [ "$TOTAL_MODELS" -gt 0 ]; then
echo "**Total models tested:** $TOTAL_MODELS" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Main results table
echo "| Test Type | Passed | Total | Rate |" >> $GITHUB_STEP_SUMMARY
echo "|-----------|--------|-------|------|" >> $GITHUB_STEP_SUMMARY
echo "| RDF Validation | $PASSED_RDF | $TOTAL_MODELS | ${RDF_RATE}% |" >> $GITHUB_STEP_SUMMARY
echo "| Model Test Run | $PASSED_MODEL | $TOTAL_MODELS | ${MODEL_RATE}% |" >> $GITHUB_STEP_SUMMARY
echo "| Reproduce Outputs | $PASSED_REPRODUCE | $TOTAL_MODELS | ${REPRODUCE_RATE}% |" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
# Score and performance metrics
echo "## Performance Metrics" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY
echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY
echo "| **Total Score** | $TOTAL_SCORE |" >> $GITHUB_STEP_SUMMARY
echo "| **Average Score per Model** | ${AVERAGE_SCORE} |" >> $GITHUB_STEP_SUMMARY
# Add execution time info if available
if [ -n "$TOTAL_EXECUTION_TIME" ] && [ "$TOTAL_EXECUTION_TIME" != "0.00" ]; then
echo "| **Total Execution Time** | ${TOTAL_EXECUTION_TIME}s |" >> $GITHUB_STEP_SUMMARY
echo "| **Average Execution Time** | ${AVERAGE_EXECUTION_TIME}s |" >> $GITHUB_STEP_SUMMARY
fi
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Test completed at:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
# Add score explanation
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Scoring System" >> $GITHUB_STEP_SUMMARY
echo "- Each model gets 1 point for each test that passes (max 3 points per model)" >> $GITHUB_STEP_SUMMARY
echo "- Collection score: 3 points per model that passes all tests" >> $GITHUB_STEP_SUMMARY
echo "- Perfect score would be: $(($TOTAL_MODELS * 3)) points" >> $GITHUB_STEP_SUMMARY
else
echo "No test results found." >> $GITHUB_STEP_SUMMARY
fi
- name: Comment on failure
if: failure()
run: |
echo "# ❌ BioImage.IO Tests Failed" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "The daily BioImage.IO model testing workflow has failed." >> $GITHUB_STEP_SUMMARY
echo "Please check the logs for more details." >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Failed at:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY