NexusGPU · Code2Life · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/api/v1/gpu_types.go b/api/v1/gpu_types.go
@@ -22,6 +22,7 @@ import (
 )
 
 // GPUStatus defines the observed state of GPU.
+// NOTE: When new fields added, remember to update syncGPUMetadataAndStatusFromCluster
 type GPUStatus struct {
 	// +kubebuilder:default=Pending
 	Phase TensorFusionGPUPhase `json:"phase"`

diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_gpus.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_gpus.yaml
@@ -65,7 +65,9 @@ spec:
           metadata:
             type: object
           status:
-            description: GPUStatus defines the observed state of GPU.
+            description: |-
+              GPUStatus defines the observed state of GPU.
+              NOTE: When new fields added, remember to update syncGPUMetadataAndStatusFromCluster
             properties:
               available:
                 properties:

diff --git a/config/crd/bases/tensor-fusion.ai_gpus.yaml b/config/crd/bases/tensor-fusion.ai_gpus.yaml
@@ -65,7 +65,9 @@ spec:
           metadata:
             type: object
           status:
-            description: GPUStatus defines the observed state of GPU.
+            description: |-
+              GPUStatus defines the observed state of GPU.
+              NOTE: When new fields added, remember to update syncGPUMetadataAndStatusFromCluster
             properties:
               available:
                 properties:

diff --git a/internal/gpuallocator/gpuallocator.go b/internal/gpuallocator/gpuallocator.go
@@ -1063,6 +1063,9 @@ func syncGPUMetadataAndStatusFromCluster(old *tfv1.GPU, gpu *tfv1.GPU) {
 	old.Status.NodeSelector = gpu.Status.NodeSelector
 	old.Status.GPUModel = gpu.Status.GPUModel
 	old.Status.UsedBy = gpu.Status.UsedBy
+	old.Status.Vendor = gpu.Status.Vendor
+	old.Status.NUMANode = gpu.Status.NUMANode
+	old.Status.Index = gpu.Status.Index
 }
 
 func (s *GpuAllocator) handleGPUUpdateCapacityDiff(old, gpu *tfv1.GPU) {

diff --git a/internal/utils/compose.go b/internal/utils/compose.go
@@ -185,6 +185,7 @@ func AppendTFWorkerLabelsAndAnnotationsAfterTemplate(
 			return strconv.Itoa(int(index))
 		}), ",")
 	}
+	annotations[constants.ComputingIsolationModeAnnotation] = string(workload.Spec.ComputeIsolation)
 	return labels, annotations
 }
 

diff --git a/internal/webhook/v1/pod_webhook.go b/internal/webhook/v1/pod_webhook.go
@@ -125,8 +125,9 @@ func (m *TensorFusionPodMutator) Handle(ctx context.Context, req admission.Reque
 	if err := m.Client.Get(ctx, client.ObjectKey{Name: tfInfo.Profile.PoolName}, pool); err != nil {
 		return admission.Errored(http.StatusInternalServerError, fmt.Errorf("gpu pool(%s) does not exist", tfInfo.Profile.PoolName))
 	}
+	tfInfo.Profile.Qos = calculateQoSLevel(tfInfo.Profile, pool)
 
-	if workload, err := m.createOrUpdateWorkload(ctx, pod, &tfInfo, pool); err != nil {
+	if workload, err := m.createOrUpdateWorkload(ctx, pod, &tfInfo); err != nil {
 		return admission.Errored(http.StatusInternalServerError, fmt.Errorf("create tf workload: %w", err))
 	} else {
 		// Pod mutating webhook can not get Pod UID,
@@ -213,18 +214,9 @@ func (m *TensorFusionPodMutator) createOrUpdateWorkload(
 	ctx context.Context,
 	pod *corev1.Pod,
 	tfInfo *utils.TensorFusionInfo,
-	pool *tfv1.GPUPool) (*tfv1.TensorFusionWorkload, error) {
+) (*tfv1.TensorFusionWorkload, error) {
 	// Create the desired spec for comparison
-	desiredSpec := tfv1.WorkloadProfileSpec{
-		Replicas:          nil,
-		PoolName:          tfInfo.Profile.PoolName,
-		Resources:         tfInfo.Profile.Resources,
-		Qos:               calculateQoSLevel(tfInfo.Profile, pool),
-		IsLocalGPU:        tfInfo.Profile.IsLocalGPU,
-		GPUCount:          tfInfo.Profile.GPUCount,
-		GPUModel:          tfInfo.Profile.GPUModel,
-		AutoScalingConfig: tfInfo.Profile.AutoScalingConfig,
-	}
+	desiredSpec := *tfInfo.Profile.DeepCopy()
 
 	workload := &tfv1.TensorFusionWorkload{}
 	err := m.Client.Get(ctx, client.ObjectKey{Name: tfInfo.WorkloadName, Namespace: pod.Namespace}, workload)

diff --git a/internal/webhook/v1/tf_parser.go b/internal/webhook/v1/tf_parser.go
@@ -181,21 +181,64 @@
 }
 
 func parseGPUResourcesAnnotations(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile) error {
+	// extract any containers has GPU count limits and set to annotation
+	isMigratedFromContainerLimits := false
+	gpuCount, hasValue := pod.Annotations[constants.GpuCountAnnotation]
+	if hasValue {
+		val, err := strconv.ParseInt(gpuCount, 10, 32)
+		if err != nil {
+			return fmt.Errorf("invalid gpuCount value: %w", err)
+		}
+		workloadProfile.Spec.GPUCount = uint32(val)
+	} else if workloadProfile.Spec.GPUCount == 0 {
+		for _, container := range pod.Spec.Containers {
+			if quantity, ok := container.Resources.Limits[constants.NvidiaGPUKey]; ok {
+				gpuNumber, err := strconv.Atoi(quantity.String())
+				if err != nil || gpuNumber <= 0 {
+					ctrl.Log.Error(err, "unrecognized nvidia.com/gpu in resources, not a valid number", "pod", pod.Name, "container", container.Name)
+				} else {
+					workloadProfile.Spec.GPUCount = uint32(gpuNumber)
+					// For seamless migration with only one tensor-fusion.ai/enabled label
+					// and one tensor-fusion.ai/vram-limit annotation, convert this to 100% computing-percent
+					workloadProfile.Spec.Resources.Limits.ComputePercent = resource.MustParse("100")
+					isMigratedFromContainerLimits = true
+					break
+				}
+			}
+		}
+	}
+
 	if tflopsLimit, hasValue := parseResourceQuantity(pod, constants.TFLOPSLimitAnnotation); hasValue {
 		workloadProfile.Spec.Resources.Limits.Tflops = tflopsLimit
+		// clean compute percent limit when tflops limit is set in annotation
+		if isMigratedFromContainerLimits {
+			workloadProfile.Spec.Resources.Limits.ComputePercent = resource.Quantity{}
+		}
 	}
 	if vramLimit, hasValue := parseResourceQuantity(pod, constants.VRAMLimitAnnotation); hasValue {
 		workloadProfile.Spec.Resources.Limits.Vram = vramLimit
 	}
 
-	computeRequest, hasValue := parseResourceQuantity(pod, constants.ComputeLimitAnnotation)
+	if tflopsRequest, hasValue := parseResourceQuantity(pod, constants.TFLOPSRequestAnnotation); hasValue {
+		workloadProfile.Spec.Resources.Requests.Tflops = tflopsRequest
+	} else if workloadProfile.Spec.Resources.Requests.Tflops.IsZero() {
+		workloadProfile.Spec.Resources.Requests.Tflops = workloadProfile.Spec.Resources.Limits.Tflops
+	}
+	if vramRequest, hasValue := parseResourceQuantity(pod, constants.VRAMRequestAnnotation); hasValue {
+		workloadProfile.Spec.Resources.Requests.Vram = vramRequest
+	} else if workloadProfile.Spec.Resources.Requests.Vram.IsZero() {
+		workloadProfile.Spec.Resources.Requests.Vram = workloadProfile.Spec.Resources.Limits.Vram
+	}
+
+	// Percentage way to specify GPU resource request, not recommended, should use TFLOPs instead
+	computeLimit, hasValue := parseResourceQuantity(pod, constants.ComputeLimitAnnotation)
 	if hasValue {
-		workloadProfile.Spec.Resources.Limits.ComputePercent = computeRequest
+		workloadProfile.Spec.Resources.Limits.ComputePercent = computeLimit
 	}
-	computeLimit, hasValue := parseResourceQuantity(pod, constants.ComputeRequestAnnotation)
+	computeRequest, hasValue := parseResourceQuantity(pod, constants.ComputeRequestAnnotation)
 	if hasValue {
-		workloadProfile.Spec.Resources.Requests.ComputePercent = computeLimit
-	} else {
+		workloadProfile.Spec.Resources.Requests.ComputePercent = computeRequest
+	} else if workloadProfile.Spec.Resources.Requests.Tflops.IsZero() && workloadProfile.Spec.Resources.Requests.ComputePercent.IsZero() {
 		workloadProfile.Spec.Resources.Requests.ComputePercent = workloadProfile.Spec.Resources.Limits.ComputePercent
 	}
 
@@ -207,44 +250,11 @@
 		return fmt.Errorf("tflops- and computePercent limit are mutually exclusive, please specify only one")
 	}
 
-	if tflopsRequest, hasValue := parseResourceQuantity(pod, constants.TFLOPSRequestAnnotation); hasValue {
-		workloadProfile.Spec.Resources.Requests.Tflops = tflopsRequest
-	} else if workloadProfile.Spec.Resources.Requests.Tflops.IsZero() {
-		workloadProfile.Spec.Resources.Requests.Tflops = workloadProfile.Spec.Resources.Limits.Tflops
-	}
-	if vramRequest, hasValue := parseResourceQuantity(pod, constants.VRAMRequestAnnotation); hasValue {
-		workloadProfile.Spec.Resources.Requests.Vram = vramRequest
-	} else if workloadProfile.Spec.Resources.Requests.Vram.IsZero() {
-		workloadProfile.Spec.Resources.Requests.Vram = workloadProfile.Spec.Resources.Limits.Vram
-	}
-
 	qosLevel, hasValue := pod.Annotations[constants.QoSLevelAnnotation]
 	if hasValue {
 		workloadProfile.Spec.Qos = tfv1.QoSLevel(qosLevel)
 	}
 
-	// extract any containers has GPU count limits and set to annotation
-	gpuCount, hasValue := pod.Annotations[constants.GpuCountAnnotation]
-	if hasValue {
-		val, err := strconv.ParseInt(gpuCount, 10, 32)
-		if err != nil {
-			return fmt.Errorf("invalid gpuCount value: %w", err)
-		}
-		workloadProfile.Spec.GPUCount = uint32(val)
-	} else if workloadProfile.Spec.GPUCount == 0 {
-		for _, container := range pod.Spec.Containers {
-			if quantity, ok := container.Resources.Limits[constants.NvidiaGPUKey]; ok {
-				gpuNumber, err := strconv.Atoi(quantity.String())
-				if err != nil || gpuNumber <= 0 {
-					ctrl.Log.Error(err, "unrecognized nvidia.com/gpu in resources, not a valid number", "pod", pod.Name, "container", container.Name)
-				} else {
-					workloadProfile.Spec.GPUCount = uint32(gpuNumber)
-					break
-				}
-			}
-		}
-	}
-
 	gpuVendor, hasValue := pod.Annotations[constants.GpuVendorAnnotation]
 	if hasValue {
 		workloadProfile.Spec.GPUVendor = gpuVendor