Skip to content

BioImage.IO Model Testing #92

BioImage.IO Model Testing

BioImage.IO Model Testing #92

Workflow file for this run

name: BioImage.IO Model Testing
on:
schedule:
# Run daily at 2:00 AM UTC
- cron: '0 2 * * *'
workflow_dispatch:
inputs:
model_ids:
description: 'Specific model IDs to test (comma-separated, leave empty for all)'
required: false
type: string
dry_run:
description: "Run in dry-run mode (don't update artifacts)"
required: false
default: false
type: boolean
jobs:
test-bioimageio-models:
runs-on: ubuntu-latest
timeout-minutes: 480
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Cache pip dependencies
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt', 'requirements-dev.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install hypha-rpc numpy httpx pydantic
- name: Create test results directory
run: mkdir -p bioimageio_test_reports
- name: Run BioImage.IO model tests
env:
HYPHA_TOKEN: ${{ secrets.HYPHA_TOKEN }}
run: |
cd scripts
# Set safe defaults for inputs (handle scheduled runs)
MODEL_IDS="${{ github.event.inputs.model_ids || '' }}"
DRY_RUN="${{ github.event.inputs.dry_run || 'false' }}"
# Build command arguments
ARGS=""
if [ -n "$MODEL_IDS" ]; then
MODEL_IDS_SPACE=$(echo "$MODEL_IDS" | tr ',' ' ')
ARGS="$ARGS --model-ids $MODEL_IDS_SPACE"
fi
if [ "$DRY_RUN" = "true" ]; then
ARGS="$ARGS --dry-run"
fi
echo "Running: python bioimageio_test_reports.py $ARGS"
python bioimageio_test_reports.py $ARGS
EXIT_CODE=$?
if [ $EXIT_CODE -ne 0 ]; then
echo "❌ Model testing failed with exit code $EXIT_CODE"
exit $EXIT_CODE
fi
- name: Upload test results as artifact
uses: actions/upload-artifact@v4
if: always()
with:
name: bioimageio-test-results-${{ github.run_number }}
path: bioimageio_test_reports/
retention-days: 30
- name: Generate summary report
if: always()
run: |
echo "# BioImage.IO Test Results Summary" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
cd scripts
ANALYSIS_OUTPUT=$(python3 bioimageio_test_reports.py --analyze-results)
echo "$ANALYSIS_OUTPUT"
eval "$ANALYSIS_OUTPUT"
if [ "$TOTAL_MODELS" -gt 0 ]; then
echo "**Total models tested:** $TOTAL_MODELS" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Test Type | Passed | Total | Rate |" >> $GITHUB_STEP_SUMMARY
echo "|-----------|--------|-------|------|" >> $GITHUB_STEP_SUMMARY
echo "| RDF Validation | $PASSED_RDF | $TOTAL_MODELS | ${RDF_RATE}% |" >> $GITHUB_STEP_SUMMARY
echo "| Model Test Run | $PASSED_INFERENCE | $TOTAL_MODELS | ${MODEL_RATE}% |" >> $GITHUB_STEP_SUMMARY
echo "| Reproduce Outputs | $PASSED_REPRODUCE | $TOTAL_MODELS | ${REPRODUCE_RATE}% |" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "## Performance Metrics" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY
echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY
echo "| **Total Score** | $TOTAL_SCORE |" >> $GITHUB_STEP_SUMMARY
echo "| **Average Score per Model** | ${AVERAGE_SCORE} |" >> $GITHUB_STEP_SUMMARY
if [ -n "$TOTAL_EXECUTION_TIME" ] && [ "$TOTAL_EXECUTION_TIME" != "0.00" ]; then
echo "| **Total Execution Time** | ${TOTAL_EXECUTION_TIME}s |" >> $GITHUB_STEP_SUMMARY
echo "| **Average Execution Time** | ${AVERAGE_EXECUTION_TIME}s |" >> $GITHUB_STEP_SUMMARY
fi
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Test completed at:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "### Scoring System" >> $GITHUB_STEP_SUMMARY
echo "- Each model gets 1 point for each test that passes (max 3 points per model)" >> $GITHUB_STEP_SUMMARY
echo "- Collection score: 3 points per model that passes all tests" >> $GITHUB_STEP_SUMMARY
echo "- Perfect score would be: $(($TOTAL_MODELS * 3)) points" >> $GITHUB_STEP_SUMMARY
else
echo "No test results found." >> $GITHUB_STEP_SUMMARY
fi
- name: Comment on failure
if: failure()
run: |
echo "# ❌ BioImage.IO Tests Failed" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "The daily BioImage.IO model testing workflow has failed." >> $GITHUB_STEP_SUMMARY
echo "Please check the logs for more details." >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Failed at:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY