RolnickLab
diff --git a/‎ami/jobs/admin.py‎
Lines changed: 12 additions & 5 deletions b/‎ami/jobs/admin.py‎
Lines changed: 12 additions & 5 deletions
diff --git a/‎ami/jobs/migrations/0011_job_job_type_key.py‎
Lines changed: 42 additions & 0 deletions b/‎ami/jobs/migrations/0011_job_job_type_key.py‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎ami/jobs/migrations/0012_alter_job_limit.py‎
Lines changed: 23 additions & 0 deletions b/‎ami/jobs/migrations/0012_alter_job_limit.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎ami/jobs/models.py‎
Lines changed: 135 additions & 33 deletions b/‎ami/jobs/models.py‎
Lines changed: 135 additions & 33 deletions
@@ -4,7 +4,7 @@
 
 from ami.main.admin import AdminBase
 
-from .models import Job
+from .models import Job, get_job_type_by_inferred_key
 
 
 @admin.register(Job)
@@ -19,7 +19,8 @@ class JobAdmin(AdminBase):
         "started_at",
         "finished_at",
         "duration",
-        "get_job_type_display",
+        "job_type_key",
+        "inferred_job_type",
     )
 
     @admin.action()
@@ -28,9 +29,15 @@ def enqueue_jobs(self, request: HttpRequest, queryset: QuerySet[Job]) -> None:
             job.enqueue()
         self.message_user(request, f"Queued {queryset.count()} job(s).")
 
-    @admin.display(description="Job Type")
-    def get_job_type_display(self, obj: Job) -> str:
-        return obj.job_type().name
+    @admin.display(description="Inferred Job Type")
+    def inferred_job_type(self, obj: Job) -> str:
+        """
+        @TODO Remove this after running migration 0011_job_job_type_key.py and troubleshooting.
+        """
+        job_type = get_job_type_by_inferred_key(obj)
+        return job_type.name if job_type else "Could not infer"
+
+        # return obj.job_type().name
 
     actions = [enqueue_jobs]
 
 
@@ -0,0 +1,42 @@
+# Generated by Django 4.2.10 on 2024-11-11 15:17
+
+from django.db import migrations, models
+
+
+# Add method to set job_type_key based on inferred job type
+def set_job_type_key(apps, schema_editor):
+    from ami.jobs.models import get_job_type_by_inferred_key, UnknownJobType
+
+    Job = apps.get_model("jobs", "Job")
+    for job in Job.objects.all():
+        inferred_key = get_job_type_by_inferred_key(job)
+        if inferred_key:
+            job.job_type_key = inferred_key.key
+        else:
+            job.job_type_key = UnknownJobType.key
+        job.save()
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("jobs", "0010_job_limit_job_shuffle"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="job",
+            name="job_type_key",
+            field=models.CharField(
+                choices=[
+                    ("ml", "ML pipeline"),
+                    ("populate_captures_collection", "Populate captures collection"),
+                    ("data_storage_sync", "Data storage sync"),
+                    ("unknown", "Unknown"),
+                ],
+                default="unknown",
+                max_length=255,
+                verbose_name="Job Type",
+            ),
+        ),
+        migrations.RunPython(set_job_type_key, reverse_code=migrations.RunPython.noop),
+    ]
@@ -0,0 +1,23 @@
+# Generated by Django 4.2.10 on 2024-11-11 17:42
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("jobs", "0011_job_job_type_key"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="job",
+            name="limit",
+            field=models.IntegerField(
+                blank=True,
+                default=None,
+                help_text="Limit the number of images to process",
+                null=True,
+                verbose_name="Limit",
+            ),
+        ),
+    ]
@@ -116,10 +116,11 @@ class JobProgress(pydantic.BaseModel):
     logs: list[str] = []
 
     def get_stage_key(self, name: str) -> str:
+        """Generate a key for a stage or param based on its name"""
         return python_slugify(name)
 
-    def add_stage(self, name: str) -> JobProgressStageDetail:
-        key = self.get_stage_key(name)
+    def add_stage(self, name: str, key: str | None = None) -> JobProgressStageDetail:
+        key = key or self.get_stage_key(name)
         try:
             return self.get_stage(key)
         except ValueError:
@@ -188,6 +189,16 @@ def update_stage(self, stage_key_or_name: str, **stage_parameters) -> JobProgres
                     self.add_or_update_stage_param(stage_key, k, v)
             return stage
 
+    def reset(self, status: JobState = JobState.CREATED):
+        """
+        Set the progress of summary and all stages to 0.
+        """
+        self.summary.progress = 0
+        self.summary.status = status
+        for stage in self.stages:
+            stage.progress = 0
+            stage.status = status
+
     class Config:
         use_enum_values = True
         as_dict = True
@@ -265,6 +276,12 @@ def emit(self, record):
 
 @dataclass
 class JobType:
+    """
+    The run method of a job is specific to the job type.
+
+    Job types must be defined as classes because they define code, not just configuration.
+    """
+
     name: str
     key: str
 
@@ -273,10 +290,7 @@ def run(cls, job: "Job"):
         """
         Execute the run function specific to this job type.
         """
-        pass
-
-
-AnyJobType = typing.TypeVar("AnyJobType", bound=JobType)
+        raise NotImplementedError("Job type has not implemented the run method")
 
 
 class MLJob(JobType):
@@ -411,14 +425,6 @@ class DataStorageSyncJob(JobType):
     name = "Data storage sync"
     key = "data_storage_sync"
 
-    @classmethod
-    def setup(cls, job: "Job", save=True):
-        job.progress = job.progress or default_job_progress
-        job.progress.add_stage(name=cls.name)
-
-        if save:
-            job.save()
-
     @classmethod
     def run(cls, job: "Job"):
         """
@@ -427,7 +433,8 @@ def run(cls, job: "Job"):
         This is meant to be called by an async task, not directly.
         """
 
-        job.progress.add_stage_param(cls.key, "Total Files", "")
+        job.progress.add_stage(cls.name)
+        job.progress.add_stage_param(cls.key, "Total files", "")
         job.update_status(JobState.STARTED)
         job.started_at = datetime.datetime.now()
         job.finished_at = None
@@ -461,6 +468,62 @@ def run(cls, job: "Job"):
         job.save()
 
 
+class SourceImageCollectionPopulateJob(JobType):
+    name = "Populate captures collection"
+    key = "populate_captures_collection"
+
+    @classmethod
+    def run(cls, job: "Job"):
+        """
+        Run the populate source image collection job.
+
+        This is meant to be called by an async task, not directly.
+        """
+        job.progress.add_stage(cls.name, key=cls.key)
+        job.progress.add_stage_param(cls.key, "Captures added", "")
+        job.update_status(JobState.STARTED)
+        job.started_at = datetime.datetime.now()
+        job.finished_at = None
+        job.save()
+
+        if not job.source_image_collection:
+            job.logger.error("No source image collection provided")
+            job.update_status(JobState.FAILURE)
+            job.finished_at = datetime.datetime.now()
+            job.save()
+            return
+
+        job.logger.info(f"Populating source image collection {job.source_image_collection}")
+        job.update_status(JobState.STARTED)
+        job.started_at = datetime.datetime.now()
+        job.finished_at = None
+        job.progress.update_stage(
+            cls.key,
+            status=JobState.STARTED,
+            progress=0.10,
+            captures_added=0,
+        )
+        job.update_progress(save=True)
+
+        job.source_image_collection.populate_sample(job=job)
+        job.logger.info(f"Finished populating source image collection {job.source_image_collection}")
+        job.save()
+
+        captures_added = job.source_image_collection.images.count()
+        job.logger.info(f"Added {captures_added} captures to source image collection {job.source_image_collection}")
+
+        job.progress.update_stage(
+            cls.key,
+            status=JobState.SUCCESS,
+            progress=1,
+            captures_added=captures_added,
+        )
+        job.finished_at = datetime.datetime.now()
+        job.update_status(JobState.SUCCESS, save=False)
+        job.update_progress(save=False)
+        job.save()
+
+
 class UnknownJobType(JobType):
     name = "Unknown"
     key = "unknown"
@@ -472,6 +535,32 @@ def run(cls, job: "Job"):
         job.save()
 
 
+VALID_JOB_TYPES = [MLJob, SourceImageCollectionPopulateJob, DataStorageSyncJob, UnknownJobType]
+
+
+def get_job_type_by_key(key: str) -> type[JobType] | None:
+    for job_type in VALID_JOB_TYPES:
+        if job_type.key == key:
+            return job_type
+
+
+def get_job_type_by_inferred_key(job: "Job") -> type[JobType] | None:
+    """
+    Infer the job type from the job's attributes.
+
+    This is used for a data migration to set the job type of existing jobs
+    before the job type field was added to the model.
+    """
+
+    if job.pipeline:
+        return MLJob
+    # Check the key of the first stage in the job progress
+    if job.progress.stages:
+        job_type = get_job_type_by_key(job.progress.stages[0].key)
+        if job_type:
+            return job_type
+
+
 class Job(BaseModel):
     """A job to be run by the scheduler"""
 
@@ -493,6 +582,9 @@ class Job(BaseModel):
         "Limit", null=True, blank=True, default=None, help_text="Limit the number of images to process"
     )
     shuffle = models.BooleanField("Shuffle", default=True, help_text="Process images in a random order")
+    job_type_key = models.CharField(
+        "Job Type", max_length=255, default=UnknownJobType.key, choices=[(t.key, t.name) for t in VALID_JOB_TYPES]
+    )
 
     project = models.ForeignKey(
         Project,
@@ -532,20 +624,15 @@ def __str__(self) -> str:
         return f'#{self.pk} "{self.name}" ({self.status})'
 
     def job_type(self) -> type[JobType]:
-        """
-        This is a temporary way to determine the type of job.
-        @TODO rework Job classes and background tasks.
-        """
-        if self.pipeline:
-            return MLJob
-
-        try:
-            self.progress.get_stage(DataStorageSyncJob.key)
-            return DataStorageSyncJob
-        except ValueError:
-            pass
-
-        return UnknownJobType
+        job_type_class = get_job_type_by_key(self.job_type_key)
+        if job_type_class:
+            return job_type_class
+        else:
+            inferred_job_type = get_job_type_by_inferred_key(self)
+            msg = f"Could not determine job type for job {self.pk} with job_type_key '{self.job_type_key}'. "
+            if inferred_job_type:
+                msg += f"Inferred job type as '{inferred_job_type.name}'"
+            raise ValueError(msg)
 
     def enqueue(self):
         """
@@ -603,6 +690,19 @@ def run(self):
         job_type.run(job=self)
         return None
 
+    def retry(self, async_task=True):
+        """
+        Retry the job.
+        """
+        self.logger.info(f"Re-running job {self}")
+        self.progress.reset()
+        self.status = JobState.RETRY
+        self.save()
+        if async_task:
+            self.enqueue()
+        else:
+            self.run()
+
     def cancel(self):
         """
         Terminate the celery task.
@@ -613,7 +713,6 @@ def cancel(self):
             task = run_job.AsyncResult(self.task_id)
             if task:
                 task.revoke(terminate=True)
-                self.status = task.status
                 self.save()
         else:
             self.status = JobState.REVOKED
@@ -646,7 +745,8 @@ def update_progress(self, save=True):
         Update the total aggregate progress from the progress of each stage.
         """
         if not len(self.progress.stages):
-            total_progress = 1
+            # Need at least one stage to calculate progress
+            total_progress = 0
         else:
             for stage in self.progress.stages:
                 if stage.progress > 0 and stage.status == JobState.CREATED:
@@ -674,11 +774,14 @@ def save(self, *args, **kwargs):
         """
         Create the job stages if they don't exist.
         """
-        if self.progress.stages:
+        if self.pk and self.progress.stages:
             self.update_progress(save=False)
         else:
             self.setup(save=False)
         super().save(*args, **kwargs)
+        logger.debug(f"Saved job {self}")
+        if self.progress.summary.status != self.status:
+            logger.warning(f"Job {self} status mismatches progress: {self.progress.summary.status} != {self.status}")
 
     @classmethod
     def default_progress(cls) -> JobProgress:
@@ -698,4 +801,3 @@ class Meta:
         # permissions = [
         #     ("run_job", "Can run a job"),
         #     ("cancel_job", "Can cancel a job"),
-        # ]