Skip to content
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8d6ae67
Store Github branch with challenge data
Zahed-Riyaz Jul 9, 2025
9164bf0
Modify backend to store github branch
Zahed-Riyaz Jul 9, 2025
3cf0b12
Merge branch 'master' into starters-versions
Zahed-Riyaz Jul 9, 2025
2f6e1a3
Handle empty branches
Zahed-Riyaz Jul 9, 2025
69b9535
Merge migrations
Zahed-Riyaz Jul 9, 2025
4bff095
Update seed.py
Zahed-Riyaz Jul 9, 2025
75a7868
Add GitHub branch versioning support for multi-version challenges
Zahed-Riyaz Jul 9, 2025
e2d7f53
Revert unnecessary changes
Zahed-Riyaz Jul 9, 2025
f6a06d7
Fix tests
Zahed-Riyaz Jul 9, 2025
3287cc9
Update Github branch var
Zahed-Riyaz Jul 10, 2025
34cbec3
Merge branch 'master' into starters-versions
Zahed-Riyaz Jul 10, 2025
a1bd1ea
Fix failing tests
Zahed-Riyaz Jul 10, 2025
6bbcea7
Pass flake8 and pylint tests
Zahed-Riyaz Jul 10, 2025
4188b9c
Add github_branch field to backend
Zahed-Riyaz Jul 16, 2025
831fec2
Add scripts for populating field
Zahed-Riyaz Jul 16, 2025
1cb4fcf
Merge branch 'master' into starters-versions
Zahed-Riyaz Jul 16, 2025
8af18d5
Allow alphanumeric values for branch name
Zahed-Riyaz Jul 17, 2025
350936f
Remove duplicate params
Zahed-Riyaz Jul 17, 2025
fb379aa
Update migrations and fallback logic
Zahed-Riyaz Jul 17, 2025
8a8e2d8
Update branch validation logic
Zahed-Riyaz Jul 17, 2025
1355c62
Merge branch 'master' into starters-versions
Zahed-Riyaz Jul 17, 2025
d136023
Merge branch 'master' into starters-versions
Zahed-Riyaz Jul 17, 2025
c8bd7c9
Reformat for quality checks
Zahed-Riyaz Jul 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion apps/challenges/challenge_config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ def get_value_from_field(data, base_location, field_name):
"challenge_metadata_schema_errors": "ERROR: Unable to serialize the challenge because of the following errors: {}.",
"evaluation_script_not_zip": "ERROR: Please pass in a zip file as evaluation script. If using the `evaluation_script` directory (recommended), it should be `evaluation_script.zip`.",
"docker_based_challenge": "ERROR: New Docker based challenges are not supported starting March 15, 2025.",
"invalid_github_branch_format": "ERROR: GitHub branch name '{branch}' is invalid. It must match the pattern 'challenge-<year>-<version>' (e.g., challenge-2024-1, challenge-2060-v2).",
}


Expand Down Expand Up @@ -364,6 +365,23 @@ def __init__(
self.phase_ids = []
self.leaderboard_ids = []

def validate_github_branch_format(self):
"""
Ensure the github branch name matches challenge-<year>-<version>
"""
branch = self.request.data.get(
"GITHUB_BRANCH_NAME"
) or self.request.data.get("BRANCH_NAME")
if not branch:
branch = "challenge"
pattern = r"^challenge-\d{4}-[a-zA-Z0-9]+$"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this also be okay with just "challenge" (default) if passed?

if not re.match(pattern, branch):
self.error_messages.append(
self.error_messages_dict[
"invalid_github_branch_format"
].format(branch=branch)
)

def read_and_validate_yaml(self):
if not self.yaml_file_count:
message = self.error_messages_dict.get("no_yaml_file")
Expand Down Expand Up @@ -587,7 +605,8 @@ def validate_serializer(self):
"GITHUB_REPOSITORY"
],
"github_branch": self.request.data.get(
"GITHUB_BRANCH_NAME", ""
"GITHUB_BRANCH_NAME", "challenge"

),
},
)
Expand Down Expand Up @@ -1134,6 +1153,9 @@ def validate_challenge_config_util(

val_config_util.validate_serializer()

# Add branch format validation
val_config_util.validate_github_branch_format()

# Get existing config IDs for leaderboards and dataset splits
if current_challenge:
current_challenge_phases = ChallengePhase.objects.filter(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,32 @@
from django.db import migrations, models


def populate_github_branch_default(apps, schema_editor):
"""
Populate existing challenges with empty github_branch fields to use "challenge" as default.
"""
Challenge = apps.get_model("challenges", "Challenge")
# Update all challenges that have github_repository but empty github_branch
Challenge.objects.filter(github_repository__isnull=False).exclude(
github_repository=""
).filter(github_branch__isnull=True).update(github_branch="challenge")

# Also update challenges with empty string github_branch
Challenge.objects.filter(github_repository__isnull=False).exclude(
github_repository=""
).filter(github_branch="").update(github_branch="challenge")


def reverse_populate_github_branch_default(apps, schema_editor):
"""
Reverse migration - set github_branch back to empty string for challenges that were set to "challenge".
"""
Challenge = apps.get_model("challenges", "Challenge")
# Only reverse if the field was set to "challenge" by this migration
Challenge.objects.filter(github_repository__isnull=False).exclude(
github_repository=""
).filter(github_branch="challenge").update(github_branch="")

def fix_duplicate_github_fields(apps, schema_editor):
"""
No data migration needed since we're using a partial unique constraint.
Expand All @@ -28,9 +54,14 @@ class Migration(migrations.Migration):
model_name="challenge",
name="github_branch",
field=models.CharField(
blank=True, default="", max_length=200, null=True
blank=True, default="challenge", max_length=200, null=True
),
),
# Data migration to populate existing records
migrations.RunPython(
populate_github_branch_default,
reverse_populate_github_branch_default,
),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we running populate github branch default and then reversing it??

Copy link
Contributor Author

@Zahed-Riyaz Zahed-Riyaz Jul 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The migration itself was written to handle the populating at first (then reverse it for use of script), but now that a script is explicitly written for it, it might be best to remove it from the migration entirely.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the best way to handle this scenario?

migrations.RunPython(
fix_duplicate_github_fields,
reverse_fix_duplicate_github_fields,
Expand Down
2 changes: 1 addition & 1 deletion apps/challenges/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def __init__(self, *args, **kwargs):
)
# The github branch name used to create/update the challenge
github_branch = models.CharField(
max_length=200, null=True, blank=True, default=""
max_length=200, null=True, blank=True, default=challenge
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this not be within quotes?

)
# The number of vCPU for a Fargate worker for the challenge. Default value
# is 0.25 vCPU.
Expand Down
7 changes: 5 additions & 2 deletions apps/challenges/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3898,8 +3898,9 @@ def create_or_update_github_challenge(request, challenge_host_team_pk):
return Response(response_data, status=status.HTTP_406_NOT_ACCEPTABLE)

# Get branch name with default fallback
github_branch = request.data.get("GITHUB_BRANCH_NAME", "")

github_branch = request.data.get("GITHUB_BRANCH_NAME") or request.data.get(
"BRANCH_NAME", "challenge"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add a reference as to where these keys are being populated from?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need a fallback? Should we not always expect a branch? Since the challenge repo will be pushing at some branch?

Copy link
Contributor Author

@Zahed-Riyaz Zahed-Riyaz Jul 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't believe there's a reference of the keys in EvalAI documentation, but originally I used GITHUB_REF_NAME and was instructed to change it to GITHUB_BRANCH_NAME during PR review in #4737.

I updated the empty string default to "challenge" now for backward compatibility so that old existing challenges (I believe in local servers) will now have a github_branch versus an empty string. Any challenge on the platform would naturally always expect a github_branch and will store one.
Modern workflows should always specify the branch, older scripts and code if for whatever reason don't specify the github_branch, they'll have a default fallback for good measure.

Screenshot 2025-07-17 at 2 45 51 PM

)
challenge_queryset = Challenge.objects.filter(
github_repository=request.data["GITHUB_REPOSITORY"],
github_branch=github_branch,
Expand Down Expand Up @@ -4288,6 +4289,8 @@ def create_or_update_github_challenge(request, challenge_host_team_pk):
"challenge_evaluation_script_file"
],
"worker_image_url": worker_image_url,
"github_repository": request.data["GITHUB_REPOSITORY"],
"github_branch": github_branch,
},
)
if serializer.is_valid():
Expand Down
79 changes: 79 additions & 0 deletions scripts/migration/populate_github_branch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env python
# Command to run: python manage.py shell < scripts/migration/populate_github_branch.py
"""
Populate existing challenges with github_branch="challenge" for backward compatibility.
This script should be run after the migration to ensure all existing challenges
have the github_branch field populated with the default value.
"""

import traceback

from challenges.models import Challenge
from django.db import models


def populate_github_branch_fields():
"""
Populate existing challenges with empty github_branch fields to use "challenge" as default.
"""
print("Starting github_branch field population...")

challenges_to_update = (
Challenge.objects.filter(github_repository__isnull=False)
.exclude(github_repository="")
.filter(
models.Q(github_branch__isnull=True) | models.Q(github_branch="")
)
Comment on lines +1 to +27
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we already have this script to populate and back-fill, why do we need to the populate/reverse populate methods at all?

)

count = challenges_to_update.count()

if count == 0:
print("No challenges found that need github_branch population.")
return

print(f"Found {count} challenges that need github_branch population.")

updated_count = challenges_to_update.update(github_branch="challenge")

print(
f"Successfully updated {updated_count} challenges with github_branch='challenge'"
)

remaining_empty = (
Challenge.objects.filter(github_repository__isnull=False)
.exclude(github_repository="")
.filter(
models.Q(github_branch__isnull=True) | models.Q(github_branch="")
)
.count()
)

if remaining_empty == 0:
print("✅ All challenges now have github_branch populated!")
else:
print(
f"⚠️ Warning: {remaining_empty} challenges still have empty github_branch fields"
)

sample_challenges = (
Challenge.objects.filter(github_repository__isnull=False)
.exclude(github_repository="")
.values("id", "title", "github_repository", "github_branch")[:5]
)

print("\nSample updated challenges:")
for challenge in sample_challenges:
print(
f" ID: {challenge['id']}, Title: {challenge['title']}, "
f"Repo: {challenge['github_repository']}, Branch: {challenge['github_branch']}"
)


try:
populate_github_branch_fields()
print("\n✅ Script completed successfully!")
except Exception as e:
print(f"\n❌ Error occurred: {e}")
print(traceback.print_exc())
2 changes: 1 addition & 1 deletion scripts/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def create_challenge(
featured=is_featured,
image=image_file,
github_repository=f"evalai-examples/{slug}",
github_branch="main",
github_branch="challenge",
)
challenge.save()

Expand Down
5 changes: 2 additions & 3 deletions tests/unit/challenges/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -6020,14 +6020,13 @@ def test_create_challenge_using_github_without_branch_name(self):
self.assertEqual(response.status_code, 201)
self.assertEqual(response.json(), expected)

# Verify github_branch defaults to empty string when not provided
# Verify github_branch defaults to "challenge" when not provided
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please also add when the branch name is "challenge", when branch name is "xyzabc", and when branch name is "challenge-2025-v2" (whatever format you have chosen).

challenge = Challenge.objects.first()
self.assertEqual(
challenge.github_repository,
"https://github.com/yourusername/repository",
)
self.assertEqual(challenge.github_branch, "")

self.assertEqual(challenge.github_branch, "challenge")

class ValidateChallengeTest(APITestCase):
def setUp(self):
Expand Down