BioImage.IO Model Testing #92
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: BioImage.IO Model Testing | |
on: | |
schedule: | |
# Run daily at 2:00 AM UTC | |
- cron: '0 2 * * *' | |
workflow_dispatch: | |
inputs: | |
model_ids: | |
description: 'Specific model IDs to test (comma-separated, leave empty for all)' | |
required: false | |
type: string | |
dry_run: | |
description: "Run in dry-run mode (don't update artifacts)" | |
required: false | |
default: false | |
type: boolean | |
jobs: | |
test-bioimageio-models: | |
runs-on: ubuntu-latest | |
timeout-minutes: 480 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.11' | |
- name: Cache pip dependencies | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt', 'requirements-dev.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
- name: Install dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install hypha-rpc numpy httpx pydantic | |
- name: Create test results directory | |
run: mkdir -p bioimageio_test_reports | |
- name: Run BioImage.IO model tests | |
env: | |
HYPHA_TOKEN: ${{ secrets.HYPHA_TOKEN }} | |
run: | | |
cd scripts | |
# Set safe defaults for inputs (handle scheduled runs) | |
MODEL_IDS="${{ github.event.inputs.model_ids || '' }}" | |
DRY_RUN="${{ github.event.inputs.dry_run || 'false' }}" | |
# Build command arguments | |
ARGS="" | |
if [ -n "$MODEL_IDS" ]; then | |
MODEL_IDS_SPACE=$(echo "$MODEL_IDS" | tr ',' ' ') | |
ARGS="$ARGS --model-ids $MODEL_IDS_SPACE" | |
fi | |
if [ "$DRY_RUN" = "true" ]; then | |
ARGS="$ARGS --dry-run" | |
fi | |
echo "Running: python bioimageio_test_reports.py $ARGS" | |
python bioimageio_test_reports.py $ARGS | |
EXIT_CODE=$? | |
if [ $EXIT_CODE -ne 0 ]; then | |
echo "❌ Model testing failed with exit code $EXIT_CODE" | |
exit $EXIT_CODE | |
fi | |
- name: Upload test results as artifact | |
uses: actions/upload-artifact@v4 | |
if: always() | |
with: | |
name: bioimageio-test-results-${{ github.run_number }} | |
path: bioimageio_test_reports/ | |
retention-days: 30 | |
- name: Generate summary report | |
if: always() | |
run: | | |
echo "# BioImage.IO Test Results Summary" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
cd scripts | |
ANALYSIS_OUTPUT=$(python3 bioimageio_test_reports.py --analyze-results) | |
echo "$ANALYSIS_OUTPUT" | |
eval "$ANALYSIS_OUTPUT" | |
if [ "$TOTAL_MODELS" -gt 0 ]; then | |
echo "**Total models tested:** $TOTAL_MODELS" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "| Test Type | Passed | Total | Rate |" >> $GITHUB_STEP_SUMMARY | |
echo "|-----------|--------|-------|------|" >> $GITHUB_STEP_SUMMARY | |
echo "| RDF Validation | $PASSED_RDF | $TOTAL_MODELS | ${RDF_RATE}% |" >> $GITHUB_STEP_SUMMARY | |
echo "| Model Test Run | $PASSED_INFERENCE | $TOTAL_MODELS | ${MODEL_RATE}% |" >> $GITHUB_STEP_SUMMARY | |
echo "| Reproduce Outputs | $PASSED_REPRODUCE | $TOTAL_MODELS | ${REPRODUCE_RATE}% |" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "## Performance Metrics" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "| Metric | Value |" >> $GITHUB_STEP_SUMMARY | |
echo "|--------|-------|" >> $GITHUB_STEP_SUMMARY | |
echo "| **Total Score** | $TOTAL_SCORE |" >> $GITHUB_STEP_SUMMARY | |
echo "| **Average Score per Model** | ${AVERAGE_SCORE} |" >> $GITHUB_STEP_SUMMARY | |
if [ -n "$TOTAL_EXECUTION_TIME" ] && [ "$TOTAL_EXECUTION_TIME" != "0.00" ]; then | |
echo "| **Total Execution Time** | ${TOTAL_EXECUTION_TIME}s |" >> $GITHUB_STEP_SUMMARY | |
echo "| **Average Execution Time** | ${AVERAGE_EXECUTION_TIME}s |" >> $GITHUB_STEP_SUMMARY | |
fi | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "**Test completed at:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "### Scoring System" >> $GITHUB_STEP_SUMMARY | |
echo "- Each model gets 1 point for each test that passes (max 3 points per model)" >> $GITHUB_STEP_SUMMARY | |
echo "- Collection score: 3 points per model that passes all tests" >> $GITHUB_STEP_SUMMARY | |
echo "- Perfect score would be: $(($TOTAL_MODELS * 3)) points" >> $GITHUB_STEP_SUMMARY | |
else | |
echo "No test results found." >> $GITHUB_STEP_SUMMARY | |
fi | |
- name: Comment on failure | |
if: failure() | |
run: | | |
echo "# ❌ BioImage.IO Tests Failed" >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "The daily BioImage.IO model testing workflow has failed." >> $GITHUB_STEP_SUMMARY | |
echo "Please check the logs for more details." >> $GITHUB_STEP_SUMMARY | |
echo "" >> $GITHUB_STEP_SUMMARY | |
echo "**Failed at:** $(date -u '+%Y-%m-%d %H:%M:%S UTC')" >> $GITHUB_STEP_SUMMARY |