@@ -159,6 +159,9 @@ type nfdMaster struct {
159
159
updaterPool * updaterPool
160
160
deniedNs
161
161
config * NFDConfig
162
+
163
+ // isLeader indicates if this instance is the leader, changing dynamically
164
+ isLeader bool
162
165
}
163
166
164
167
// NewNfdMaster creates a new NfdMaster server instance.
@@ -318,10 +321,11 @@ func (m *nfdMaster) Run() error {
318
321
// Run updater that handles events from the nfd CRD API.
319
322
if m .nfdController != nil {
320
323
if m .args .EnableLeaderElection {
321
- go m .nfdAPIUpdateHandlerWithLeaderElection ()
324
+ go m .startLeaderElectionHandler ()
322
325
} else {
323
- go m . nfdAPIUpdateHandler ()
326
+ m . isLeader = true
324
327
}
328
+ go m .nfdAPIUpdateHandler ()
325
329
}
326
330
327
331
// Start gRPC server for liveness probe (at this point we're "live")
@@ -394,6 +398,12 @@ func (m *nfdMaster) nfdAPIUpdateHandler() {
394
398
case nodeFeatureGroupName := <- m .nfdController .updateNodeFeatureGroupChan :
395
399
nodeFeatureGroup [nodeFeatureGroupName ] = struct {}{}
396
400
case <- rateLimit :
401
+ // If we're not the leader, don't do anything, sleep a bit longer
402
+ if ! m .isLeader {
403
+ rateLimit = time .After (5 * time .Second )
404
+ break
405
+ }
406
+
397
407
// NodeFeature
398
408
errUpdateAll := false
399
409
if updateAll {
@@ -1359,7 +1369,7 @@ func (m *nfdMaster) startNfdApiController() error {
1359
1369
return nil
1360
1370
}
1361
1371
1362
- func (m * nfdMaster ) nfdAPIUpdateHandlerWithLeaderElection () {
1372
+ func (m * nfdMaster ) startLeaderElectionHandler () {
1363
1373
ctx := context .Background ()
1364
1374
lock := & resourcelock.LeaseLock {
1365
1375
LeaseMeta : metav1.ObjectMeta {
@@ -1380,11 +1390,15 @@ func (m *nfdMaster) nfdAPIUpdateHandlerWithLeaderElection() {
1380
1390
RenewDeadline : m .config .LeaderElection .RenewDeadline .Duration ,
1381
1391
Callbacks : leaderelection.LeaderCallbacks {
1382
1392
OnStartedLeading : func (_ context.Context ) {
1383
- m .nfdAPIUpdateHandler ()
1393
+ m .isLeader = true
1384
1394
},
1385
1395
OnStoppedLeading : func () {
1386
1396
// We lost the lock.
1387
1397
klog .InfoS ("leaderelection lock was lost" )
1398
+ // We stop (i.e. exit), makes sure that in-flight
1399
+ // requests/re-tries will be stopped TODO: more graceful
1400
+ // handling that does not exit the pod (set m.isLeader to false
1401
+ // and flush updater queue and flush updater queues...)
1388
1402
m .Stop ()
1389
1403
},
1390
1404
},
0 commit comments