Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,271 @@
"""
Django management command to extract assignment dates from modulestore and populate ContentDate table.
"""

import logging
from typing import List

from django.contrib.auth import get_user_model
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from edx_when.api import update_or_create_assignments_due_dates, models as when_models
from opaque_keys import InvalidKeyError
from opaque_keys.edx.keys import CourseKey
from xmodule.modulestore.django import modulestore
from xmodule.modulestore.exceptions import ItemNotFoundError

from lms.djangoapps.courseware.courses import get_course_assignments
from openedx.core.djangoapps.content.course_overviews.models import CourseOverview

log = logging.getLogger(__name__)

User = get_user_model()


class Command(BaseCommand):
"""
Management command to seed ContentDate table with assignment due dates from modulestore.

Example usage:
# Dry run for all courses
python manage.py lms seed_content_dates --dry-run

# Seed specific course
python manage.py lms seed_content_dates --course-id "course-v1:MITx+6.00x+2023_Fall"

# Seed all courses for specific org
python manage.py lms seed_content_dates --org "MITx"

# Force update existing entries
python manage.py lms seed_content_dates --force-update
"""

help = "Extract assignment dates from modulestore and populate ContentDate table"

def add_arguments(self, parser):
parser.add_argument(
"--course-id",
type=str,
help='Specific course ID to process (e.g., "course-v1:MITx+6.00x+2023_Fall")',
)
parser.add_argument("--org", type=str, help="Organization to filter courses by")
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be processed without making changes",
)
parser.add_argument(
"--force-update",
action="store_true",
help="Update existing ContentDate entries (default: skip existing)",
)
parser.add_argument(
"--batch-size",
type=int,
default=100,
help="Number of assignments to process in each batch (default: 100)",
)

def handle(self, *args, **options):
self.dry_run = options["dry_run"] # pylint: disable=attribute-defined-outside-init
self.force_update = options["force_update"] # pylint: disable=attribute-defined-outside-init
self.batch_size = options["batch_size"] # pylint: disable=attribute-defined-outside-init

if self.dry_run:
self.stdout.write(
self.style.WARNING(
"DRY RUN MODE: No changes will be made to the database"
)
)

try:
course_keys = self._get_course_keys(options)

total_processed = 0
total_created = 0
total_updated = 0
total_skipped = 0

for course_key in course_keys:
self.stdout.write(f"Processing course: {course_key}")

try:
processed, created, updated, skipped = self._process_course(
course_key
)
total_processed += processed
total_created += created
total_updated += updated
total_skipped += skipped

self.stdout.write(
f" Course {course_key}: {processed} assignments processed, "
f"{created} created, {updated} updated, {skipped} skipped"
)

except Exception as e: # pylint: disable=broad-exception-caught
self.stdout.write(
self.style.ERROR(
f"Error processing course {course_key}: {str(e)}"
)
)
log.exception(f"Error processing course {course_key}")
continue

self.stdout.write(
self.style.SUCCESS(
f"\nSUMMARY:\n"
f"Total assignments processed: {total_processed}\n"
f"Total created: {total_created}\n"
f"Total updated: {total_updated}\n"
f"Total skipped: {total_skipped}"
)
)

except CommandError:
raise
except Exception as e:
raise CommandError(f"Command failed: {str(e)}") from e

def _get_course_keys(self, options) -> List[CourseKey]:
"""
Get list of course keys to process based on command options.
"""
course_keys = []

if options["course_id"]:
try:
course_key = CourseKey.from_string(options["course_id"])

if not CourseOverview.objects.filter(id=course_key).exists():
raise CommandError(f"Course not found: {options['course_id']}")
course_keys.append(course_key)
except InvalidKeyError as e:
raise CommandError(f"Invalid course ID format: {options['course_id']}") from e

else:
queryset = CourseOverview.objects.all()
if options["org"]:
queryset = queryset.filter(org=options["org"])

course_keys = [overview.id for overview in queryset]

if not course_keys:
filter_msg = f" for org '{options['org']}'" if options["org"] else ""
raise CommandError(f"No courses found{filter_msg}")

return course_keys

def _process_course(self, course_key: CourseKey) -> tuple[int, int, int, int]:
"""
Process a single course and return (processed, created, updated, skipped) counts.
"""
store = modulestore()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure of the current performance of this, but passing it as an argument seems like the right thing to do.


try:
course = store.get_course(course_key)
if not course:
raise ItemNotFoundError(
f"Course not found in modulestore: {course_key}"
)
except ItemNotFoundError:
log.warning(f"Course not found in modulestore: {course_key}")
return (0, 0, 0, 0)

staff_user = User.objects.filter(is_staff=True).first()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This pattern is unacceptable in my opinion. We need a more robust approach that doesn't pick any old staff user for the convenience of the management command.

if not staff_user:
log.warning("No staff user found; cannot load assignments")
return (0, 0, 0, 0)
assignments = get_course_assignments(course_key, staff_user)

if not assignments:
log.info(f"No assignments with due dates found in course: {course_key}")
return (0, 0, 0, 0)

processed = len(assignments)
created = 0
updated = 0
skipped = 0

if self.dry_run:
self.stdout.write(f" Would process {processed} assignments")
for assignment in assignments[:5]: # Show first 5 as preview
self.stdout.write(f" - {assignment.title} (due: {assignment.date})")
if len(assignments) > 5:
self.stdout.write(f" ... and {len(assignments) - 5} more")
return processed, 0, 0, 0

existing_due_locations: set = set(
when_models.ContentDate.objects.filter(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldn't be reaching in to when's models directly. If there isn't an API that currently supports this, we should create it.

course_id=course_key, field="due"
).values_list("location", flat=True)
)

with transaction.atomic():
for i in range(0, len(assignments), self.batch_size):
batch = assignments[i : i + self.batch_size]
batch_created, batch_updated, batch_skipped = (
self._process_assignment_batch(
course_key, batch, existing_due_locations
)
)
created += batch_created
updated += batch_updated
skipped += batch_skipped

return processed, created, updated, skipped

def _process_assignment_batch(
self,
course_key: CourseKey,
assignments: list,
existing_due_locations: set,
) -> tuple[int, int, int]:
"""
Process a batch of assignments and return (created, updated, skipped) counts.

existing_due_locations is updated in place when new ContentDate rows are created.
"""
created = 0
updated = 0
skipped = 0

for assignment in assignments:
self.stdout.write(
f"Processing assignment: {assignment.block_key}/{assignment.assignment_type} (due: {assignment.date})"
)
already_exists = assignment.block_key in existing_due_locations

if already_exists and not self.force_update:
skipped += 1
log.info(
f"Skipping existing ContentDate for {assignment.title} "
f"in course {course_key}"
)
continue

try:
update_or_create_assignments_due_dates(course_key, [assignment])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why aren't we using the batch capability of the imported function rather than running this in a loop with single item lists?

existing_due_locations.add(assignment.block_key)

if already_exists:
updated += 1
log.info(
f"Updated ContentDate for {assignment.title} "
f"in course {course_key}"
)
else:
created += 1
log.info(
f"Created ContentDate for {assignment.title} "
f"in course {course_key}"
)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this broad catch and continue? I think this is problematic because Django, I believe, will mark the atomic transaction that you created above as invalid and all of the updates will ultimately fail. If the idea is to make this save as much as possible, you will have to change the transaction boundaries.

except Exception as e: # pylint: disable=broad-exception-caught
log.error(
f"Failed to process assignment {assignment.title} "
f"in course {course_key}: {str(e)}"
)
continue

return created, updated, skipped
Loading