Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/v1/gpu_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
)

// GPUStatus defines the observed state of GPU.
// NOTE: When new fields added, remember to update syncGPUMetadataAndStatusFromCluster
type GPUStatus struct {
// +kubebuilder:default=Pending
Phase TensorFusionGPUPhase `json:"phase"`
Expand Down
4 changes: 3 additions & 1 deletion charts/tensor-fusion/crds/tensor-fusion.ai_gpus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ spec:
metadata:
type: object
status:
description: GPUStatus defines the observed state of GPU.
description: |-
GPUStatus defines the observed state of GPU.
NOTE: When new fields added, remember to update syncGPUMetadataAndStatusFromCluster
properties:
available:
properties:
Expand Down
4 changes: 3 additions & 1 deletion config/crd/bases/tensor-fusion.ai_gpus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ spec:
metadata:
type: object
status:
description: GPUStatus defines the observed state of GPU.
description: |-
GPUStatus defines the observed state of GPU.
NOTE: When new fields added, remember to update syncGPUMetadataAndStatusFromCluster
properties:
available:
properties:
Expand Down
3 changes: 3 additions & 0 deletions internal/gpuallocator/gpuallocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,9 @@ func syncGPUMetadataAndStatusFromCluster(old *tfv1.GPU, gpu *tfv1.GPU) {
old.Status.NodeSelector = gpu.Status.NodeSelector
old.Status.GPUModel = gpu.Status.GPUModel
old.Status.UsedBy = gpu.Status.UsedBy
old.Status.Vendor = gpu.Status.Vendor
old.Status.NUMANode = gpu.Status.NUMANode
old.Status.Index = gpu.Status.Index
}

func (s *GpuAllocator) handleGPUUpdateCapacityDiff(old, gpu *tfv1.GPU) {
Expand Down
1 change: 1 addition & 0 deletions internal/utils/compose.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ func AppendTFWorkerLabelsAndAnnotationsAfterTemplate(
return strconv.Itoa(int(index))
}), ",")
}
annotations[constants.ComputingIsolationModeAnnotation] = string(workload.Spec.ComputeIsolation)
return labels, annotations
}

Expand Down
16 changes: 4 additions & 12 deletions internal/webhook/v1/pod_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,9 @@ func (m *TensorFusionPodMutator) Handle(ctx context.Context, req admission.Reque
if err := m.Client.Get(ctx, client.ObjectKey{Name: tfInfo.Profile.PoolName}, pool); err != nil {
return admission.Errored(http.StatusInternalServerError, fmt.Errorf("gpu pool(%s) does not exist", tfInfo.Profile.PoolName))
}
tfInfo.Profile.Qos = calculateQoSLevel(tfInfo.Profile, pool)

if workload, err := m.createOrUpdateWorkload(ctx, pod, &tfInfo, pool); err != nil {
if workload, err := m.createOrUpdateWorkload(ctx, pod, &tfInfo); err != nil {
return admission.Errored(http.StatusInternalServerError, fmt.Errorf("create tf workload: %w", err))
} else {
// Pod mutating webhook can not get Pod UID,
Expand Down Expand Up @@ -213,18 +214,9 @@ func (m *TensorFusionPodMutator) createOrUpdateWorkload(
ctx context.Context,
pod *corev1.Pod,
tfInfo *utils.TensorFusionInfo,
pool *tfv1.GPUPool) (*tfv1.TensorFusionWorkload, error) {
) (*tfv1.TensorFusionWorkload, error) {
// Create the desired spec for comparison
desiredSpec := tfv1.WorkloadProfileSpec{
Replicas: nil,
PoolName: tfInfo.Profile.PoolName,
Resources: tfInfo.Profile.Resources,
Qos: calculateQoSLevel(tfInfo.Profile, pool),
IsLocalGPU: tfInfo.Profile.IsLocalGPU,
GPUCount: tfInfo.Profile.GPUCount,
GPUModel: tfInfo.Profile.GPUModel,
AutoScalingConfig: tfInfo.Profile.AutoScalingConfig,
}
desiredSpec := *tfInfo.Profile.DeepCopy()

workload := &tfv1.TensorFusionWorkload{}
err := m.Client.Get(ctx, client.ObjectKey{Name: tfInfo.WorkloadName, Namespace: pod.Namespace}, workload)
Expand Down
86 changes: 48 additions & 38 deletions internal/webhook/v1/tf_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,21 +181,64 @@
}

func parseGPUResourcesAnnotations(pod *corev1.Pod, workloadProfile *tfv1.WorkloadProfile) error {
// extract any containers has GPU count limits and set to annotation
isMigratedFromContainerLimits := false
gpuCount, hasValue := pod.Annotations[constants.GpuCountAnnotation]
if hasValue {
val, err := strconv.ParseInt(gpuCount, 10, 32)
if err != nil {
return fmt.Errorf("invalid gpuCount value: %w", err)
}
workloadProfile.Spec.GPUCount = uint32(val)
} else if workloadProfile.Spec.GPUCount == 0 {
for _, container := range pod.Spec.Containers {
if quantity, ok := container.Resources.Limits[constants.NvidiaGPUKey]; ok {
gpuNumber, err := strconv.Atoi(quantity.String())
if err != nil || gpuNumber <= 0 {
ctrl.Log.Error(err, "unrecognized nvidia.com/gpu in resources, not a valid number", "pod", pod.Name, "container", container.Name)
} else {
workloadProfile.Spec.GPUCount = uint32(gpuNumber)
// For seamless migration with only one tensor-fusion.ai/enabled label
// and one tensor-fusion.ai/vram-limit annotation, convert this to 100% computing-percent
workloadProfile.Spec.Resources.Limits.ComputePercent = resource.MustParse("100")
isMigratedFromContainerLimits = true
break
}
}
}
}

if tflopsLimit, hasValue := parseResourceQuantity(pod, constants.TFLOPSLimitAnnotation); hasValue {
workloadProfile.Spec.Resources.Limits.Tflops = tflopsLimit
// clean compute percent limit when tflops limit is set in annotation
if isMigratedFromContainerLimits {
workloadProfile.Spec.Resources.Limits.ComputePercent = resource.Quantity{}
}
}
if vramLimit, hasValue := parseResourceQuantity(pod, constants.VRAMLimitAnnotation); hasValue {
workloadProfile.Spec.Resources.Limits.Vram = vramLimit
}

computeRequest, hasValue := parseResourceQuantity(pod, constants.ComputeLimitAnnotation)
if tflopsRequest, hasValue := parseResourceQuantity(pod, constants.TFLOPSRequestAnnotation); hasValue {
workloadProfile.Spec.Resources.Requests.Tflops = tflopsRequest
} else if workloadProfile.Spec.Resources.Requests.Tflops.IsZero() {
workloadProfile.Spec.Resources.Requests.Tflops = workloadProfile.Spec.Resources.Limits.Tflops
}
if vramRequest, hasValue := parseResourceQuantity(pod, constants.VRAMRequestAnnotation); hasValue {
workloadProfile.Spec.Resources.Requests.Vram = vramRequest
} else if workloadProfile.Spec.Resources.Requests.Vram.IsZero() {
workloadProfile.Spec.Resources.Requests.Vram = workloadProfile.Spec.Resources.Limits.Vram
}

// Percentage way to specify GPU resource request, not recommended, should use TFLOPs instead
computeLimit, hasValue := parseResourceQuantity(pod, constants.ComputeLimitAnnotation)
if hasValue {
workloadProfile.Spec.Resources.Limits.ComputePercent = computeRequest
workloadProfile.Spec.Resources.Limits.ComputePercent = computeLimit
}
computeLimit, hasValue := parseResourceQuantity(pod, constants.ComputeRequestAnnotation)
computeRequest, hasValue := parseResourceQuantity(pod, constants.ComputeRequestAnnotation)
if hasValue {
workloadProfile.Spec.Resources.Requests.ComputePercent = computeLimit
} else {
workloadProfile.Spec.Resources.Requests.ComputePercent = computeRequest
} else if workloadProfile.Spec.Resources.Requests.Tflops.IsZero() && workloadProfile.Spec.Resources.Requests.ComputePercent.IsZero() {
workloadProfile.Spec.Resources.Requests.ComputePercent = workloadProfile.Spec.Resources.Limits.ComputePercent
}

Expand All @@ -207,44 +250,11 @@
return fmt.Errorf("tflops- and computePercent limit are mutually exclusive, please specify only one")
}

if tflopsRequest, hasValue := parseResourceQuantity(pod, constants.TFLOPSRequestAnnotation); hasValue {
workloadProfile.Spec.Resources.Requests.Tflops = tflopsRequest
} else if workloadProfile.Spec.Resources.Requests.Tflops.IsZero() {
workloadProfile.Spec.Resources.Requests.Tflops = workloadProfile.Spec.Resources.Limits.Tflops
}
if vramRequest, hasValue := parseResourceQuantity(pod, constants.VRAMRequestAnnotation); hasValue {
workloadProfile.Spec.Resources.Requests.Vram = vramRequest
} else if workloadProfile.Spec.Resources.Requests.Vram.IsZero() {
workloadProfile.Spec.Resources.Requests.Vram = workloadProfile.Spec.Resources.Limits.Vram
}

qosLevel, hasValue := pod.Annotations[constants.QoSLevelAnnotation]
if hasValue {
workloadProfile.Spec.Qos = tfv1.QoSLevel(qosLevel)
}

// extract any containers has GPU count limits and set to annotation
gpuCount, hasValue := pod.Annotations[constants.GpuCountAnnotation]
if hasValue {
val, err := strconv.ParseInt(gpuCount, 10, 32)
if err != nil {
return fmt.Errorf("invalid gpuCount value: %w", err)
}
workloadProfile.Spec.GPUCount = uint32(val)
} else if workloadProfile.Spec.GPUCount == 0 {
for _, container := range pod.Spec.Containers {
if quantity, ok := container.Resources.Limits[constants.NvidiaGPUKey]; ok {
gpuNumber, err := strconv.Atoi(quantity.String())
if err != nil || gpuNumber <= 0 {
ctrl.Log.Error(err, "unrecognized nvidia.com/gpu in resources, not a valid number", "pod", pod.Name, "container", container.Name)
} else {
workloadProfile.Spec.GPUCount = uint32(gpuNumber)
break
}
}
}
}

gpuVendor, hasValue := pod.Annotations[constants.GpuVendorAnnotation]
if hasValue {
workloadProfile.Spec.GPUVendor = gpuVendor
Expand Down