NexusGPU · Code2Life · Jun 3, 2025 · May 29, 2025 · May 29, 2025 · May 30, 2025
diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,7 @@ Dockerfile.cross
 
 # Output of the go coverage tool, specifically when used with LiteIDE
 *.out
+cover.out.*
 
 # Go workspace file
 go.work

diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -55,7 +55,10 @@
             "type": "go",
             "request": "launch",
             "mode": "test",
-            "program": "${workspaceFolder}",
+            "env": {
+                "GO_TESTING": "true"
+            },
+            "program": "${workspaceFolder}/internal/controller",
             "console": "integratedTerminal"
         }
     ]

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,12 +1,15 @@
 {
     "cSpell.words": [
+        "alertmanager",
         "alicloud",
         "Aliyun",
         "AMDCDNA",
         "AMDRDNA",
         "apimachinery",
+        "automount",
         "AWSGPU",
         "batchv",
+        "burstable",
         "CDNA",
         "certificaterequests",
         "certmanager",
@@ -39,6 +42,7 @@
         "greptime",
         "greptimedb",
         "healthz",
+        "iface",
         "karpenter",
         "kubebuilder",
         "KUBECONFIG",
@@ -51,6 +55,7 @@
         "NVML",
         "omitempty",
         "onsi",
+        "portallocator",
         "printcolumn",
         "prometheusagents",
         "prometheuses",
@@ -62,11 +67,13 @@
         "schedulingconfigtemplates",
         "schedulingcorev",
         "shirou",
+        "strategicpatches",
         "subresource",
         "tensorfusion",
         "tensorfusionaiv",
         "tensorfusioncluster",
         "tensorfusionclusters",
+        "tensorfusionworkload",
         "Tera",
         "tflops",
         "Tmpl",

diff --git a/Makefile b/Makefile
@@ -62,13 +62,8 @@ vet: ## Run go vet against code.
 
 .PHONY: test
 test: manifests generate fmt vet envtest ## Run tests.
-	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -timeout 0 -coverprofile cover.out
+	KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" GO_TESTING=true go run github.com/onsi/ginkgo/v2/ginkgo -p -timeout 0 -cover -coverprofile cover.out -r --skip-file ./test/e2e
 
-# TODO(user): To use a different vendor for e2e tests, modify the setup under 'tests/e2e'.
-# The default setup assumes Kind is pre-installed and builds/loads the Manager Docker image locally.
-# Prometheus and CertManager are installed by default; skip with:
-# - PROMETHEUS_INSTALL_SKIP=true
-# - CERT_MANAGER_INSTALL_SKIP=true
 .PHONY: test-e2e
 test-e2e: manifests generate fmt vet ## Run the e2e tests. Expected an isolated environment using Kind.
 	@command -v kind >/dev/null 2>&1 || { \

diff --git a/api/v1/gpu_types.go b/api/v1/gpu_types.go
@@ -36,6 +36,18 @@ type GPUStatus struct {
 	GPUModel     string            `json:"gpuModel"`
 
 	Message string `json:"message"`
+
+	// +optional
+	RunningApps []*RunningAppDetail `json:"runningApps,omitempty"`
+}
+
+type RunningAppDetail struct {
+	// Workload name namespace
+	Name      string `json:"name,omitempty"`
+	Namespace string `json:"namespace,omitempty"`
+
+	// Worker count
+	Count int `json:"count"`
 }
 
 // +kubebuilder:validation:Enum=Pending;Provisioning;Running;Unknown;Destroying;Migrating

diff --git a/api/v1/gpunode_funcs.go b/api/v1/gpunode_funcs.go
@@ -12,7 +12,7 @@ func (node *GPUNode) InitializeStatus(initTFlops, initVRAM resource.Quantity, in
 		TotalTFlops:         initTFlops,
 		TotalVRAM:           initVRAM,
 		TotalGPUs:           initGPUs,
-		AllocationDetails:   &[]GPUNodeAllocationDetails{},
+		AllocationInfo:      []*RunningAppDetail{},
 		LoadedModels:        &[]string{},
 		ManagedGPUDeviceIDs: []string{},
 		ObservedGeneration:  node.Generation,

diff --git a/api/v1/gpunode_types.go b/api/v1/gpunode_types.go
@@ -94,20 +94,8 @@ type GPUNodeStatus struct {
 
 	ObservedGeneration int64 `json:"observedGeneration,omitempty"`
 
-	// Allocation details is for node compaction, and calculate used apps
 	// +optional
-	AllocationDetails *[]GPUNodeAllocationDetails `json:"allocationDetails,omitempty"`
-}
-
-type GPUNodeAllocationDetails struct {
-	PodID        string `json:"podID,omitempty"`
-	PodName      string `json:"podName,omitempty"`
-	Namespace    string `json:"namespace"`
-	WorkloadName string `json:"workload,omitempty"`
-
-	Requests GPUResourceUnit `json:"requests"`
-	Limits   GPUResourceUnit `json:"limits"`
-	QoS      QoSLevel        `json:"qos,omitempty"`
+	AllocationInfo []*RunningAppDetail `json:"allocationInfo,omitempty"`
 }
 
 // +kubebuilder:validation:Enum=Pending;Provisioning;Migrating;Running;Succeeded;Failed;Unknown;Destroying

diff --git a/api/v1/gpupool_types.go b/api/v1/gpupool_types.go
@@ -293,7 +293,7 @@ type QosPricing struct {
 
 	Requests GPUResourcePricingUnit `json:"requests,omitempty"`
 
-	// Default requests and limitsOverRequests are same, indicates normal on-demand serverless GPU usage, in hands-on lab low QoS case, limitsOverRequests should be cheaper, for example Low QoS, ratio should be 0.5
+	// Default requests and limitsOverRequests are same, indicates normal on-demand serverless GPU usage, in hands-on lab low QoS case, limitsOverRequests should be lower, so that user can get burstable GPU resources with very low cost
 	// +kubebuilder:default="1"
 	LimitsOverRequestsChargingRatio string `json:"limitsOverRequests,omitempty"`
 }
@@ -372,6 +372,8 @@ type GPUPoolStatus struct {
 	AvailableTFlops resource.Quantity `json:"availableTFlops"`
 	AvailableVRAM   resource.Quantity `json:"availableVRAM"`
 
+	RunningAppsCnt int32 `json:"runningAppsCnt,omitempty"`
+
 	// +optional
 	VirtualAvailableTFlops *resource.Quantity `json:"virtualAvailableTFlops,omitempty"`
 	// +optional

diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
diff --git a/charts/tensor-fusion/Chart.yaml b/charts/tensor-fusion/Chart.yaml
@@ -15,10 +15,10 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 1.2.22
+version: 1.3.2
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
 # It is recommended to use it with quotes.
-appVersion: "1.12.1"
+appVersion: "1.30.3"
diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_gpunodes.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_gpunodes.yaml
@@ -86,66 +86,19 @@ spec:
           status:
             description: GPUNodeStatus defines the observed state of GPUNode.
             properties:
-              allocationDetails:
-                description: Allocation details is for node compaction, and calculate
-                  used apps
+              allocationInfo:
                 items:
                   properties:
-                    limits:
-                      properties:
-                        tflops:
-                          anyOf:
-                          - type: integer
-                          - type: string
-                          description: Tera floating point operations per second
-                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                          x-kubernetes-int-or-string: true
-                        vram:
-                          anyOf:
-                          - type: integer
-                          - type: string
-                          description: VRAM is short for Video memory, namely GPU
-                            RAM
-                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                          x-kubernetes-int-or-string: true
-                      type: object
-                    namespace:
-                      type: string
-                    podID:
-                      type: string
-                    podName:
-                      type: string
-                    qos:
-                      enum:
-                      - low
-                      - medium
-                      - high
-                      - critical
+                    count:
+                      description: Worker count
+                      type: integer
+                    name:
+                      description: Workload name namespace
                       type: string
-                    requests:
-                      properties:
-                        tflops:
-                          anyOf:
-                          - type: integer
-                          - type: string
-                          description: Tera floating point operations per second
-                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                          x-kubernetes-int-or-string: true
-                        vram:
-                          anyOf:
-                          - type: integer
-                          - type: string
-                          description: VRAM is short for Video memory, namely GPU
-                            RAM
-                          pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
-                          x-kubernetes-int-or-string: true
-                      type: object
-                    workload:
+                    namespace:
                       type: string
                   required:
-                  - limits
-                  - namespace
-                  - requests
+                  - count
                   type: object
                 type: array
               availableTFlops:

diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_gpupools.yaml
@@ -546,7 +546,8 @@ spec:
                           description: Default requests and limitsOverRequests are
                             same, indicates normal on-demand serverless GPU usage,
                             in hands-on lab low QoS case, limitsOverRequests should
-                            be cheaper, for example Low QoS, ratio should be 0.5
+                            be lower, so that user can get burstable GPU resources
+                            with very low cost
                           type: string
                         qos:
                           enum:
@@ -704,6 +705,9 @@ spec:
               readyNodes:
                 format: int32
                 type: integer
+              runningAppsCnt:
+                format: int32
+                type: integer
               savedCostsPerMonth:
                 type: string
               totalGPUs:

diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_gpus.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_gpus.yaml
@@ -116,6 +116,21 @@ spec:
                 - Destroying
                 - Migrating
                 type: string
+              runningApps:
+                items:
+                  properties:
+                    count:
+                      description: Worker count
+                      type: integer
+                    name:
+                      description: Workload name namespace
+                      type: string
+                    namespace:
+                      type: string
+                  required:
+                  - count
+                  type: object
+                type: array
               uuid:
                 type: string
             required:

diff --git a/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml b/charts/tensor-fusion/crds/tensor-fusion.ai_tensorfusionclusters.yaml
@@ -650,8 +650,8 @@ spec:
                                     description: Default requests and limitsOverRequests
                                       are same, indicates normal on-demand serverless
                                       GPU usage, in hands-on lab low QoS case, limitsOverRequests
-                                      should be cheaper, for example Low QoS, ratio
-                                      should be 0.5
+                                      should be lower, so that user can get burstable
+                                      GPU resources with very low cost
                                     type: string
                                   qos:
                                     enum:

diff --git a/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml b/charts/tensor-fusion/templates/admission-webhooks/mutating-webhook.yaml
@@ -22,6 +22,7 @@ webhooks:
     resources:
     - pods
   sideEffects: None
+  timeoutSeconds: 30
   objectSelector:
     matchExpressions:
       - key: tensor-fusion.ai/enabled
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,6 +12,7 @@ Dockerfile.cross @@
     # Output of the go coverage tool, specifically when used with LiteIDE
     *.out
+    cover.out.*
     # Go workspace file
     go.work
@@ Expand Down @@