BioImage.IO Model Testing #20
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: BioImage.IO Model Testing | |
on: | |
schedule: | |
# Run daily at 2:00 AM UTC | |
- cron: '0 2 * * *' | |
pull_request: | |
# Run on pull requests to test changes | |
types: [opened, synchronize, reopened] | |
workflow_dispatch: | |
inputs: | |
skip_exists: | |
description: 'Skip models that already have test results' | |
required: false | |
default: 'true' | |
type: choice | |
options: | |
- 'true' | |
- 'false' | |
model_ids: | |
description: 'Specific model IDs to test (comma-separated, leave empty for all)' | |
required: false | |
type: string | |
dry_run: | |
description: "Run in dry-run mode (don't update artifacts)" | |
required: false | |
default: false | |
type: boolean | |
jobs: | |
test-bioimageio-models: | |
runs-on: ubuntu-latest | |
timeout-minutes: ${{ github.event_name == 'pull_request' && 60 || 480 }} # 1 hour for PRs, 8 hours for scheduled runs | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.11' | |
- name: Cache pip dependencies | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt', 'requirements-dev.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install hypha-rpc numpy httpx pydantic | |
- name: Create test results directory | |
run: mkdir -p bioimageio_test_reports | |
- name: Run BioImage.IO model tests | |
env: | |
HYPHA_TOKEN: ${{ secrets.HYPHA_TOKEN }} | |
run: | | |
cd scripts | |
# Build command arguments | |
ARGS="" | |
# Handle skip_exists argument | |
if [ "${{ github.event.inputs.skip_exists }}" = "false" ]; then | |
ARGS="$ARGS --no-skip-exists" | |
else | |
ARGS="$ARGS --skip-exists" | |
fi | |
# Handle model_ids argument | |
if [ -n "${{ github.event.inputs.model_ids }}" ]; then | |
# Convert comma-separated list to space-separated for the script | |
MODEL_IDS=$(echo "${{ github.event.inputs.model_ids }}" | tr ',' ' ') | |
ARGS="$ARGS --model-ids $MODEL_IDS" | |
fi | |
# Handle dry_run argument | |
if [ "${{ github.event.inputs.dry_run }}" = "true" ]; then | |
ARGS="$ARGS --dry-run" | |
fi | |
# For pull requests, always run in dry-run mode to avoid updating artifacts | |
if [ "${{ github.event_name }}" = "pull_request" ]; then | |
ARGS="$ARGS --dry-run --skip-exists" | |
echo "Running in dry-run mode for pull request" | |
# For PR testing, limit to a small subset of models for faster feedback | |
# This can be overridden by manually specifying model_ids in workflow_dispatch | |
if [ -z "${{ github.event.inputs.model_ids }}" ]; then | |
echo "Using limited model set for pull request testing" | |
# You can modify this list to include specific models for PR testing | |
ARGS="$ARGS --model-ids affable-shark chatty-frog zealous-oxen" | |
fi | |
fi | |
# For scheduled runs, always use skip-exists to avoid re-testing | |
if [ "${{ github.event_name }}" = "schedule" ]; then | |
ARGS="--skip-exists" | |
fi | |
echo "Running: python bioimageio_test_reports.py $ARGS" | |
python bioimageio_test_reports.py $ARGS | |
- name: Upload test results as artifact | |
uses: actions/upload-artifact@v4 | |
if: always() | |
with: | |
name: bioimageio-test-results-${{ github.run_number }} | |
path: bioimageio_test_reports/ | |
retention-days: 30 | |
- name: Generate summary report | |
if: always() | |
run: | | |
echo "# BioImage.IO Test Results Summary" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
# Add dry-run notice for pull requests | |
if [ "${{ github.event_name }}" = "pull_request" ]; then | |
echo "🧪 **Pull Request Test Mode**" >> $GITHUB_STEP_SUMMARY | |
echo "- Running in DRY-RUN mode (no artifacts updated)" >> $GITHUB_STEP_SUMMARY | |
echo "- Testing limited model subset for faster feedback" >> $GITHUB_STEP_SUMMARY | |
echo "- Using existing test results where available" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
fi | |
# Use the analysis function to get statistics | |
cd scripts | |
ANALYSIS_OUTPUT=$(python3 bioimageio_test_reports.py --analyze-results) | |
echo "$ANALYSIS_OUTPUT" | |
# Parse the analysis output | |
eval "$ANALYSIS_OUTPUT" | |
if [ "$TOTAL_MODELS" -gt 0 ]; then | |
echo "**Total models tested:** $TOTAL_MODELS" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
# Main results table | |
echo "| Test Type | Passed | Total | Rate |" >> $GITHUB_STEP_SUMMARY | |
echo "|-----------|--------|-------|------|" >> $GITHUB_STEP_SUMMARY | |
echo "| RDF Validation | $PASSED_RDF | $TOTAL_MODELS | ${RDF_RATE}% |" >> $GITHUB_STEP_SUMMARY | |
echo "| Model Test Run | $PASSED_MODEL | $TOTAL_MODELS | ${MODEL_RATE}% |" >> $GITHUB_STEP_SUMMARY | |
echo "| Reproduce Outputs | $PASSED_REPRODUCE | $TOTAL_MODELS | ${REPRODUCE_RATE}% |" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
# Score and performance metrics | |
echo "## Performance Metrics" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY | |
echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY | |
echo "| **Total Score** | $TOTAL_SCORE |" >> $GITHUB_STEP_SUMMARY | |
echo "| **Average Score per Model** | ${AVERAGE_SCORE} |" >> $GITHUB_STEP_SUMMARY | |
# Add execution time info if available | |
if [ -n "$TOTAL_EXECUTION_TIME" ] && [ "$TOTAL_EXECUTION_TIME" != "0.00" ]; then | |
echo "| **Total Execution Time** | ${TOTAL_EXECUTION_TIME}s |" >> $GITHUB_STEP_SUMMARY | |
echo "| **Average Execution Time** | ${AVERAGE_EXECUTION_TIME}s |" >> $GITHUB_STEP_SUMMARY | |
fi | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "**Test completed at:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY | |
# Add score explanation | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "### Scoring System" >> $GITHUB_STEP_SUMMARY | |
echo "- Each model gets 1 point for each test that passes (max 3 points per model)" >> $GITHUB_STEP_SUMMARY | |
echo "- Collection score: 3 points per model that passes all tests" >> $GITHUB_STEP_SUMMARY | |
echo "- Perfect score would be: $(($TOTAL_MODELS * 3)) points" >> $GITHUB_STEP_SUMMARY | |
else | |
echo "No test results found." >> $GITHUB_STEP_SUMMARY | |
fi | |
- name: Comment on failure | |
if: failure() | |
run: | | |
echo "# ❌ BioImage.IO Tests Failed" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "The daily BioImage.IO model testing workflow has failed." >> $GITHUB_STEP_SUMMARY | |
echo "Please check the logs for more details." >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "**Failed at:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY |