Skip to content

Commit 1ff919c

Browse files
authored
Merge pull request #2135 from marquiz/release-0.17
[release-0.17] nfd-master: fix memory leak when leader election is enabled
2 parents 8bf4553 + 11401f6 commit 1ff919c

File tree

1 file changed

+18
-4
lines changed

1 file changed

+18
-4
lines changed

pkg/nfd-master/nfd-master.go

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,9 @@ type nfdMaster struct {
159159
updaterPool *updaterPool
160160
deniedNs
161161
config *NFDConfig
162+
163+
// isLeader indicates if this instance is the leader, changing dynamically
164+
isLeader bool
162165
}
163166

164167
// NewNfdMaster creates a new NfdMaster server instance.
@@ -318,10 +321,11 @@ func (m *nfdMaster) Run() error {
318321
// Run updater that handles events from the nfd CRD API.
319322
if m.nfdController != nil {
320323
if m.args.EnableLeaderElection {
321-
go m.nfdAPIUpdateHandlerWithLeaderElection()
324+
go m.startLeaderElectionHandler()
322325
} else {
323-
go m.nfdAPIUpdateHandler()
326+
m.isLeader = true
324327
}
328+
go m.nfdAPIUpdateHandler()
325329
}
326330

327331
// Start gRPC server for liveness probe (at this point we're "live")
@@ -394,6 +398,12 @@ func (m *nfdMaster) nfdAPIUpdateHandler() {
394398
case nodeFeatureGroupName := <-m.nfdController.updateNodeFeatureGroupChan:
395399
nodeFeatureGroup[nodeFeatureGroupName] = struct{}{}
396400
case <-rateLimit:
401+
// If we're not the leader, don't do anything, sleep a bit longer
402+
if !m.isLeader {
403+
rateLimit = time.After(5 * time.Second)
404+
break
405+
}
406+
397407
// NodeFeature
398408
errUpdateAll := false
399409
if updateAll {
@@ -1359,7 +1369,7 @@ func (m *nfdMaster) startNfdApiController() error {
13591369
return nil
13601370
}
13611371

1362-
func (m *nfdMaster) nfdAPIUpdateHandlerWithLeaderElection() {
1372+
func (m *nfdMaster) startLeaderElectionHandler() {
13631373
ctx := context.Background()
13641374
lock := &resourcelock.LeaseLock{
13651375
LeaseMeta: metav1.ObjectMeta{
@@ -1380,11 +1390,15 @@ func (m *nfdMaster) nfdAPIUpdateHandlerWithLeaderElection() {
13801390
RenewDeadline: m.config.LeaderElection.RenewDeadline.Duration,
13811391
Callbacks: leaderelection.LeaderCallbacks{
13821392
OnStartedLeading: func(_ context.Context) {
1383-
m.nfdAPIUpdateHandler()
1393+
m.isLeader = true
13841394
},
13851395
OnStoppedLeading: func() {
13861396
// We lost the lock.
13871397
klog.InfoS("leaderelection lock was lost")
1398+
// We stop (i.e. exit), makes sure that in-flight
1399+
// requests/re-tries will be stopped TODO: more graceful
1400+
// handling that does not exit the pod (set m.isLeader to false
1401+
// and flush updater queue and flush updater queues...)
13881402
m.Stop()
13891403
},
13901404
},

0 commit comments

Comments
 (0)