@@ -35,6 +35,20 @@ var workloadNameNs = tfv1.NameNamespace{Namespace: "default", Name: "test-worklo
35
35
var _ = Describe ("GPU Allocator" , func () {
36
36
var allocator * GpuAllocator
37
37
38
+ allocateAndSync := func (poolName string , request tfv1.Resource , count uint , gpuModel string ) ([]* tfv1.GPU , error ) {
39
+ gpus , err := allocator .Alloc (ctx , poolName , workloadNameNs , request , count , gpuModel )
40
+ allocator .syncToK8s (ctx )
41
+ return gpus , err
42
+ }
43
+
44
+ deallocateAndSync := func (gpus []* tfv1.GPU , request tfv1.Resource ) {
45
+ err := allocator .Dealloc (ctx , workloadNameNs , request , lo .Map (gpus , func (gpu * tfv1.GPU , _ int ) types.NamespacedName {
46
+ return client .ObjectKeyFromObject (gpu )
47
+ }))
48
+ Expect (err ).NotTo (HaveOccurred ())
49
+ allocator .syncToK8s (ctx )
50
+ }
51
+
38
52
BeforeEach (func () {
39
53
allocator = NewGpuAllocator (ctx , k8sClient , 150 * time .Millisecond )
40
54
readyCh , err := allocator .SetupWithManager (ctx , mgr )
@@ -63,14 +77,10 @@ var _ = Describe("GPU Allocator", func() {
63
77
Vram : resource .MustParse ("8Gi" ),
64
78
}
65
79
66
- gpus , err := allocator . Alloc ( ctx , "test-pool" , workloadNameNs , request , 1 , "" )
80
+ gpus , err := allocateAndSync ( "test-pool" , request , 1 , "" )
67
81
Expect (err ).NotTo (HaveOccurred ())
68
82
Expect (gpus ).To (HaveLen (1 ))
69
83
70
- // Explicitly call syncToK8s to persist changes before verification
71
- allocator .syncToK8s (ctx )
72
-
73
- // Explicitly refresh node capacity, simulate reconcile loop of GPUNode triggered
74
84
gpuNode := & tfv1.GPUNode {}
75
85
if err := k8sClient .Get (ctx , types.NamespacedName {Name : gpus [0 ].Labels [constants .LabelKeyOwner ]}, gpuNode ); err != nil {
76
86
Expect (err ).NotTo (HaveOccurred ())
@@ -82,7 +92,7 @@ var _ = Describe("GPU Allocator", func() {
82
92
_ , _ = RefreshGPUNodeCapacity (ctx , k8sClient , gpuNode , pool )
83
93
84
94
// Verify resources were reduced on the allocated GPU
85
- gpu := getGPU (gpus [0 ].Name , gpus [ 0 ]. Namespace )
95
+ gpu := getGPU (gpus [0 ].Name )
86
96
Expect (gpu .Status .Available .Tflops .Cmp (gpu .Status .Capacity .Tflops )).To (Equal (- 1 ))
87
97
Expect (gpu .Status .Available .Vram .Cmp (gpu .Status .Capacity .Vram )).To (Equal (- 1 ))
88
98
@@ -99,7 +109,7 @@ var _ = Describe("GPU Allocator", func() {
99
109
Vram : resource .MustParse ("4Gi" ),
100
110
}
101
111
102
- gpus , err := allocator . Alloc ( ctx , "test-pool" , workloadNameNs , request , 2 , "" )
112
+ gpus , err := allocateAndSync ( "test-pool" , request , 2 , "" )
103
113
Expect (err ).NotTo (HaveOccurred ())
104
114
Expect (gpus ).To (HaveLen (2 ))
105
115
@@ -116,7 +126,7 @@ var _ = Describe("GPU Allocator", func() {
116
126
Vram : resource .MustParse ("2Gi" ),
117
127
}
118
128
119
- _ , err := allocator . Alloc ( ctx , "test-pool" , workloadNameNs , request , 10 , "" )
129
+ _ , err := allocateAndSync ( "test-pool" , request , 10 , "" )
120
130
Expect (err ).To (HaveOccurred ())
121
131
})
122
132
@@ -126,7 +136,7 @@ var _ = Describe("GPU Allocator", func() {
126
136
Vram : resource .MustParse ("64Gi" ),
127
137
}
128
138
129
- _ , err := allocator . Alloc ( ctx , "test-pool" , workloadNameNs , request , 1 , "" )
139
+ _ , err := allocateAndSync ( "test-pool" , request , 1 , "" )
130
140
Expect (err ).To (HaveOccurred ())
131
141
})
132
142
@@ -136,7 +146,7 @@ var _ = Describe("GPU Allocator", func() {
136
146
Vram : resource .MustParse ("2Gi" ),
137
147
}
138
148
139
- _ , err := allocator . Alloc ( ctx , "nonexistent-pool" , workloadNameNs , request , 1 , "" )
149
+ _ , err := allocateAndSync ( "nonexistent-pool" , request , 1 , "" )
140
150
Expect (err ).To (HaveOccurred ())
141
151
})
142
152
@@ -147,13 +157,12 @@ var _ = Describe("GPU Allocator", func() {
147
157
}
148
158
149
159
// Try allocating with a specific GPU model
150
- gpus , err := allocator . Alloc ( ctx , "test-pool" , workloadNameNs , request , 1 , "NVIDIA A100" )
160
+ gpus , err := allocateAndSync ( "test-pool" , request , 1 , "NVIDIA A100" )
151
161
Expect (err ).NotTo (HaveOccurred ())
152
- Expect (gpus ).To (HaveLen (1 ))
153
162
Expect (gpus [0 ].Status .GPUModel ).To (Equal ("NVIDIA A100" ))
154
163
155
164
// Try allocating with a non-existent GPU model
156
- _ , err = allocator . Alloc ( ctx , "test-pool" , workloadNameNs , request , 1 , "NonExistentModel" )
165
+ _ , err = allocateAndSync ( "test-pool" , request , 1 , "NonExistentModel" )
157
166
Expect (err ).To (HaveOccurred ())
158
167
})
159
168
})
@@ -166,7 +175,7 @@ var _ = Describe("GPU Allocator", func() {
166
175
Vram : resource .MustParse ("6Gi" ),
167
176
}
168
177
169
- gpus , err := allocator . Alloc ( ctx , "test-pool" , workloadNameNs , request , 1 , "" )
178
+ gpus , err := allocateAndSync ( "test-pool" , request , 1 , "" )
170
179
Expect (err ).NotTo (HaveOccurred ())
171
180
Expect (gpus ).To (HaveLen (1 ))
172
181
@@ -176,13 +185,10 @@ var _ = Describe("GPU Allocator", func() {
176
185
allocatedVram := allocatedGPU .Status .Available .Vram .DeepCopy ()
177
186
178
187
// Now deallocate
179
- err = allocator .Dealloc (ctx , workloadNameNs , request , []types.NamespacedName {client .ObjectKeyFromObject (gpus [0 ])})
180
- Expect (err ).NotTo (HaveOccurred ())
181
-
182
- allocator .syncToK8s (ctx )
188
+ deallocateAndSync (gpus , request )
183
189
184
190
// Verify resources were restored
185
- deallocatedGPU := getGPU (allocatedGPU .Name , allocatedGPU . Namespace )
191
+ deallocatedGPU := getGPU (allocatedGPU .Name )
186
192
expectedTflops := allocatedTflops .DeepCopy ()
187
193
expectedVram := allocatedVram .DeepCopy ()
188
194
expectedTflops .Add (request .Tflops )
@@ -201,7 +207,7 @@ var _ = Describe("GPU Allocator", func() {
201
207
}
202
208
203
209
// Allocate 2 GPUs
204
- allocatedGPUs , err := allocator . Alloc ( ctx , "test-pool" , tfv1. NameNamespace { Namespace : "default" , Name : "test-workload" } , request , 2 , "" )
210
+ allocatedGPUs , err := allocateAndSync ( "test-pool" , request , 2 , "" )
205
211
Expect (err ).NotTo (HaveOccurred ())
206
212
Expect (allocatedGPUs ).To (HaveLen (2 ))
207
213
@@ -230,23 +236,14 @@ var _ = Describe("GPU Allocator", func() {
230
236
vram : gpu .Status .Available .Vram .DeepCopy (),
231
237
}
232
238
}
233
- gpusToDeallocKeys := lo .Map (gpusToDealloc , func (gpu * tfv1.GPU , _ int ) types.NamespacedName {
234
- return client .ObjectKeyFromObject (gpu )
235
- })
239
+
236
240
// Now deallocate all GPUs including the non-existent one
237
- err = allocator .Dealloc (ctx , tfv1.NameNamespace {Namespace : "default" , Name : "test-workload" }, request , gpusToDeallocKeys )
238
- Expect (err ).NotTo (HaveOccurred ())
241
+ deallocateAndSync (gpusToDealloc , request )
239
242
240
243
// Verify resources were restored for existing GPUs
241
244
for _ , allocatedGPU := range allocatedGPUs {
242
- deallocatedGPU := getGPU (allocatedGPU .Name , allocatedGPU . Namespace )
245
+ deallocatedGPU := getGPU (allocatedGPU .Name )
243
246
initialState := initialStates [allocatedGPU .Name ]
244
-
245
- expectedTflops := initialState .tflops .DeepCopy ()
246
- expectedVram := initialState .vram .DeepCopy ()
247
- expectedTflops .Add (request .Tflops )
248
- expectedVram .Add (request .Vram )
249
-
250
247
Expect (deallocatedGPU .Status .Available .Tflops .Cmp (initialState .tflops )).To (Equal (1 ))
251
248
Expect (deallocatedGPU .Status .Available .Vram .Cmp (initialState .vram )).To (Equal (1 ))
252
249
}
@@ -301,7 +298,7 @@ var _ = Describe("GPU Allocator", func() {
301
298
Expect (exists ).To (BeTrue ())
302
299
303
300
// Get the GPU from the API server
304
- gpuToDelete := getGPU ("gpu-1" , "" )
301
+ gpuToDelete := getGPU ("gpu-1" )
305
302
306
303
// Handle the deletion event
307
304
allocator .handleGPUDelete (ctx , gpuToDelete )
0 commit comments