Skip to content

Commit 3f1e728

Browse files
authored
fix: skip pod creation when workload is being deleted (#213)
* feat: skip pod creation when workload is being deleted * fix lint
1 parent d255e6c commit 3f1e728

File tree

1 file changed

+26
-4
lines changed

1 file changed

+26
-4
lines changed

internal/controller/tensorfusionworkload_controller.go

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ func (r *TensorFusionWorkloadReconciler) Reconcile(ctx context.Context, req ctrl
123123
return ctrl.Result{RequeueAfter: constants.PendingRequeueDuration}, nil
124124
}
125125

126+
if !workload.DeletionTimestamp.IsZero() {
127+
log.Info("Workload is being deleted, skipping pod creation", "name", workload.Name, "namespace", workload.Namespace)
128+
return ctrl.Result{}, nil
129+
}
130+
126131
// Fetch the GPUPool
127132
pool := &tfv1.GPUPool{}
128133
if err := r.Get(ctx, client.ObjectKey{Name: workload.Spec.PoolName}, pool); err != nil {
@@ -144,6 +149,27 @@ func (r *TensorFusionWorkloadReconciler) Reconcile(ctx context.Context, req ctrl
144149
}
145150
}
146151

152+
result, err := r.reconcileScaling(ctx, workload, podList, workerGenerator, podTemplateHash)
153+
if err != nil || !result.IsZero() {
154+
return result, err
155+
}
156+
157+
if err := r.updateStatus(ctx, workload, podList.Items, workerGenerator); err != nil {
158+
return ctrl.Result{}, err
159+
}
160+
161+
return ctrl.Result{}, nil
162+
}
163+
164+
// reconcileScaling handles scaling up and down of worker pods and updates replica status
165+
func (r *TensorFusionWorkloadReconciler) reconcileScaling(
166+
ctx context.Context,
167+
workload *tfv1.TensorFusionWorkload,
168+
podList *corev1.PodList,
169+
workerGenerator *worker.WorkerGenerator,
170+
podTemplateHash string,
171+
) (ctrl.Result, error) {
172+
log := log.FromContext(ctx)
147173
// Check if there are any Pods using the old podTemplateHash and delete them if any
148174
if len(podList.Items) > 0 {
149175
var outdatedPods []corev1.Pod
@@ -210,10 +236,6 @@ func (r *TensorFusionWorkloadReconciler) Reconcile(ctx context.Context, req ctrl
210236
}
211237
}
212238

213-
if err := r.updateStatus(ctx, workload, podList.Items, workerGenerator); err != nil {
214-
return ctrl.Result{}, err
215-
}
216-
217239
return ctrl.Result{}, nil
218240
}
219241

0 commit comments

Comments
 (0)