@@ -332,6 +332,18 @@ func (r *TensorFusionWorkloadReconciler) handlePodGPUCleanup(ctx context.Context
332
332
333
333
log .Info ("Processing pod with GPU resource cleanup finalizer" , "pod" , pod .Name )
334
334
335
+ pod .Annotations [constants .GpuReleasedAnnotation ] = shortuuid .New ()
336
+
337
+ // Update the annotation of the Pod to mark that GPU cleanup has been successfully processed.
338
+ // This is a key part of ensuring idempotency for the handlePodGPUCleanup function.
339
+ // If this function is called again for the same Pod instance (e.g., due to the client cache
340
+ // not yet reflecting the finalizer's removal), Then this r.Update pod will fail.
341
+ // Will not cause duplicate releases
342
+ if err := r .Update (ctx , pod ); err != nil {
343
+ log .Error (err , "Failed to mark that GPU cleanup of pod" )
344
+ return false , err
345
+ }
346
+
335
347
// read the GPU names from the pod annotations
336
348
gpuNamesStr , ok := pod .Annotations [constants .GpuKey ]
337
349
if ! ok {
@@ -355,17 +367,6 @@ func (r *TensorFusionWorkloadReconciler) handlePodGPUCleanup(ctx context.Context
355
367
if pod .Annotations == nil {
356
368
pod .Annotations = make (map [string ]string )
357
369
}
358
- pod .Annotations [constants .GpuReleasedAnnotation ] = shortuuid .New ()
359
-
360
- // Update the annotation of the Pod to mark that GPU cleanup has been successfully processed.
361
- // This is a key part of ensuring idempotency for the handlePodGPUCleanup function.
362
- // If this function is called again for the same Pod instance (e.g., due to the client cache
363
- // not yet reflecting the finalizer's removal), Then this r.Update pod will fail.
364
- // Will not cause duplicate releases
365
- if err := r .Update (ctx , pod ); err != nil {
366
- log .Error (err , "Failed to mark that GPU cleanup of pod" )
367
- return false , err
368
- }
369
370
370
371
return true , nil
371
372
}
0 commit comments