Skip to content

Commit 4509dd6

Browse files
authored
Merge branch 'main' into dev-joey-05.28
2 parents ca256b8 + 5cbfd14 commit 4509dd6

File tree

1 file changed

+12
-11
lines changed

1 file changed

+12
-11
lines changed

internal/controller/tensorfusionworkload_controller.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,18 @@ func (r *TensorFusionWorkloadReconciler) handlePodGPUCleanup(ctx context.Context
332332

333333
log.Info("Processing pod with GPU resource cleanup finalizer", "pod", pod.Name)
334334

335+
pod.Annotations[constants.GpuReleasedAnnotation] = shortuuid.New()
336+
337+
// Update the annotation of the Pod to mark that GPU cleanup has been successfully processed.
338+
// This is a key part of ensuring idempotency for the handlePodGPUCleanup function.
339+
// If this function is called again for the same Pod instance (e.g., due to the client cache
340+
// not yet reflecting the finalizer's removal), Then this r.Update pod will fail.
341+
// Will not cause duplicate releases
342+
if err := r.Update(ctx, pod); err != nil {
343+
log.Error(err, "Failed to mark that GPU cleanup of pod")
344+
return false, err
345+
}
346+
335347
// read the GPU names from the pod annotations
336348
gpuNamesStr, ok := pod.Annotations[constants.GpuKey]
337349
if !ok {
@@ -355,17 +367,6 @@ func (r *TensorFusionWorkloadReconciler) handlePodGPUCleanup(ctx context.Context
355367
if pod.Annotations == nil {
356368
pod.Annotations = make(map[string]string)
357369
}
358-
pod.Annotations[constants.GpuReleasedAnnotation] = shortuuid.New()
359-
360-
// Update the annotation of the Pod to mark that GPU cleanup has been successfully processed.
361-
// This is a key part of ensuring idempotency for the handlePodGPUCleanup function.
362-
// If this function is called again for the same Pod instance (e.g., due to the client cache
363-
// not yet reflecting the finalizer's removal), Then this r.Update pod will fail.
364-
// Will not cause duplicate releases
365-
if err := r.Update(ctx, pod); err != nil {
366-
log.Error(err, "Failed to mark that GPU cleanup of pod")
367-
return false, err
368-
}
369370

370371
return true, nil
371372
}

0 commit comments

Comments
 (0)