Skip to content

Commit 32813be

Browse files
Merge branch 'master' into feature/KO-427-local-pvc-cleanup
2 parents e59bc15 + 1b53dbe commit 32813be

File tree

3 files changed

+135
-70
lines changed

3 files changed

+135
-70
lines changed

internal/controller/cluster/pod.go

Lines changed: 66 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,34 @@ func (r *SingleClusterReconciler) getRollingRestartTypeMap(rackState *RackState,
117117
return nil, nil, err
118118
}
119119

120-
// If EnableDynamicConfigUpdate is set and dynamic config command exec partially failed in previous try
121-
// then skip dynamic config update and fall back to rolling restart.
122-
// Continue with dynamic config update in case of Failed DynamicConfigUpdateStatus
123-
if asdbv1.GetBool(r.aeroCluster.Spec.EnableDynamicConfigUpdate) &&
124-
podStatus.DynamicConfigUpdateStatus != asdbv1.PartiallyFailed {
125-
// Fetching all dynamic config change.
126-
dynamicConfDiffPerPod[pods[idx].Name], err = r.handleDynamicConfigChange(rackState, pods[idx], version)
127-
if err != nil {
128-
return nil, nil, err
120+
specToStatusDiffs, err := getConfDiff(r.Log, rackState.Rack.AerospikeConfig.Value, pods[idx].Annotations, version)
121+
if err != nil {
122+
return nil, nil, err
123+
}
124+
125+
if len(specToStatusDiffs) != 0 {
126+
for key := range specToStatusDiffs {
127+
// To update in-memory namespace data-size, we need to restart the pod.
128+
// Just a warm restart is not enough.
129+
// https://support.aerospike.com/s/article/How-to-change-data-size-config-in-a-running-cluster
130+
if strings.HasSuffix(key, ".storage-engine.data-size") {
131+
restartTypeMap[pods[idx].Name] = mergeRestartType(restartTypeMap[pods[idx].Name], podRestart)
132+
133+
break
134+
}
135+
}
136+
137+
if restartTypeMap[pods[idx].Name] == podRestart {
138+
continue
139+
}
140+
141+
// If EnableDynamicConfigUpdate is set and dynamic config command exec partially failed in previous try
142+
// then skip dynamic config update and fall back to rolling restart.
143+
// Continue with dynamic config update in case of Failed DynamicConfigUpdateStatus
144+
if asdbv1.GetBool(r.aeroCluster.Spec.EnableDynamicConfigUpdate) &&
145+
podStatus.DynamicConfigUpdateStatus != asdbv1.PartiallyFailed &&
146+
isAllDynamicConfig(r.Log, specToStatusDiffs, version) {
147+
dynamicConfDiffPerPod[pods[idx].Name] = specToStatusDiffs
129148
}
130149
}
131150
}
@@ -1365,16 +1384,46 @@ func (r *SingleClusterReconciler) getConfigMap(rackID int) (*corev1.ConfigMap, e
13651384
return confMap, nil
13661385
}
13671386

1368-
func (r *SingleClusterReconciler) handleDynamicConfigChange(rackState *RackState, pod *corev1.Pod, version string) (
1369-
asconfig.DynamicConfigMap, error) {
1370-
statusFromAnnotation := pod.Annotations["aerospikeConf"]
1387+
// isAllDynamicConfig checks if all the configuration changes can be applied dynamically
1388+
func isAllDynamicConfig(log logger, specToStatusDiffs asconfig.DynamicConfigMap, version string) bool {
1389+
isDynamic, err := asconfig.IsAllDynamicConfig(log, specToStatusDiffs, version)
1390+
if err != nil {
1391+
log.Info("Failed to check if all config is dynamic, fallback to rolling restart", "error", err.Error())
1392+
return false
1393+
}
1394+
1395+
if !isDynamic {
1396+
log.Info("Config contains static field changes; dynamic update not possible.")
1397+
return false
1398+
}
1399+
1400+
return true
1401+
}
13711402

1372-
asConfStatus, err := getFlatConfig(r.Log, statusFromAnnotation)
1403+
func getFlatConfig(log logger, confStr string) (*asconfig.Conf, error) {
1404+
asConf, err := asconfig.NewASConfigFromBytes(log, []byte(confStr), asconfig.AeroConfig)
13731405
if err != nil {
13741406
return nil, fmt.Errorf("failed to load config map by lib: %v", err)
13751407
}
13761408

1377-
asConf, err := asconfig.NewMapAsConfig(r.Log, rackState.Rack.AerospikeConfig.Value)
1409+
return asConf.GetFlatMap(), nil
1410+
}
1411+
1412+
// getConfDiff retrieves the configuration differences between the spec and status Aerospike configurations.
1413+
func getConfDiff(log logger, specConfig map[string]interface{}, podAnnotations map[string]string,
1414+
version string) (asconfig.DynamicConfigMap, error) {
1415+
statusFromAnnotation, ok := podAnnotations["aerospikeConf"]
1416+
if !ok {
1417+
log.Info("Pod annotation 'aerospikeConf' missing")
1418+
return nil, nil
1419+
}
1420+
1421+
asConfStatus, err := getFlatConfig(log, statusFromAnnotation)
1422+
if err != nil {
1423+
return nil, fmt.Errorf("failed to load config map by lib: %v", err)
1424+
}
1425+
1426+
asConf, err := asconfig.NewMapAsConfig(log, specConfig)
13781427
if err != nil {
13791428
return nil, fmt.Errorf("failed to load config map by lib: %v", err)
13801429
}
@@ -1384,44 +1433,22 @@ func (r *SingleClusterReconciler) handleDynamicConfigChange(rackState *RackState
13841433
specConfFile = strings.ReplaceAll(specConfFile, "$${_DNE}{un}", "${un}")
13851434
specConfFile = strings.ReplaceAll(specConfFile, "$${_DNE}{dn}", "${dn}")
13861435

1387-
asConfSpec, err := getFlatConfig(r.Log, specConfFile)
1436+
asConfSpec, err := getFlatConfig(log, specConfFile)
13881437
if err != nil {
13891438
return nil, fmt.Errorf("failed to load config map by lib: %v", err)
13901439
}
13911440

1392-
specToStatusDiffs, err := asconfig.ConfDiff(r.Log, *asConfSpec, *asConfStatus,
1441+
specToStatusDiffs, err := asconfig.ConfDiff(log, *asConfSpec, *asConfStatus,
13931442
true, version)
13941443
if err != nil {
1395-
r.Log.Info("Failed to get config diff to change config dynamically, fallback to rolling restart",
1444+
log.Info("Failed to get config diff to change config dynamically, fallback to rolling restart",
13961445
"error", err.Error())
13971446
return nil, nil
13981447
}
13991448

1400-
if len(specToStatusDiffs) > 0 {
1401-
isDynamic, err := asconfig.IsAllDynamicConfig(r.Log, specToStatusDiffs, version)
1402-
if err != nil {
1403-
r.Log.Info("Failed to check if all config is dynamic, fallback to rolling restart", "error", err.Error())
1404-
return nil, nil
1405-
}
1406-
1407-
if !isDynamic {
1408-
r.Log.Info("Static field has been modified, cannot change config dynamically")
1409-
return nil, nil
1410-
}
1411-
}
1412-
14131449
return specToStatusDiffs, nil
14141450
}
14151451

1416-
func getFlatConfig(log logger, confStr string) (*asconfig.Conf, error) {
1417-
asConf, err := asconfig.NewASConfigFromBytes(log, []byte(confStr), asconfig.AeroConfig)
1418-
if err != nil {
1419-
return nil, fmt.Errorf("failed to load config map by lib: %v", err)
1420-
}
1421-
1422-
return asConf.GetFlatMap(), nil
1423-
}
1424-
14251452
func (r *SingleClusterReconciler) patchPodStatus(ctx context.Context, patches []jsonpatch.PatchOperation) error {
14261453
if len(patches) == 0 {
14271454
return nil

test/cleanup-test-namespace.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ for namespace in $namespaces; do
2323
echo "Removing Aerospike backup from namespace: $namespace"
2424
kubectl -n "$namespace" delete aerospikebackup --all
2525

26-
echo "Removing Aerospike backup service from namespace: $namespace"
27-
kubectl -n "$namespace" delete aerospikebackupservice --all
26+
echo "Removing Aerospike backup service from namespace: $namespace"
27+
kubectl -n "$namespace" delete aerospikebackupservice --all
2828

2929
# Force delete pods
3030
kubectl -n "$namespace" delete pod --selector 'app=aerospike-cluster' --grace-period=0 --force --ignore-not-found

test/cluster/dynamic_config_test.go

Lines changed: 67 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ type podID struct {
3232
asdPID string
3333
}
3434

35-
const clName = "dynamic-config-test"
35+
const (
36+
clName = "dynamic-config-test"
37+
noRestart = "noRestart"
38+
)
3639

3740
var (
3841
configWithMaxDefaultVal = mapset.NewSet("info-max-ms", "flush-max-ms")
@@ -72,6 +75,8 @@ var _ = Describe(
7275
aeroCluster := createDummyAerospikeCluster(
7376
clusterNamespacedName, 2,
7477
)
78+
aeroCluster.Spec.AerospikeConfig.Value["namespaces"] = append(
79+
aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{}), getNonSCInMemoryNamespaceConfig("mem"))
7580
aeroCluster.Spec.AerospikeConfig.Value["xdr"] = map[string]interface{}{
7681
"dcs": []map[string]interface{}{
7782
{
@@ -169,7 +174,7 @@ var _ = Describe(
169174

170175
By("Verify no warm/cold restarts in Pods")
171176

172-
validateServerRestart(ctx, aeroCluster, podPIDMap, false)
177+
validateServerRestart(ctx, aeroCluster, podPIDMap, noRestart)
173178

174179
By("Modify dynamic config by removing fields")
175180

@@ -215,14 +220,14 @@ var _ = Describe(
215220

216221
By("Verify no warm/cold restarts in Pods")
217222

218-
validateServerRestart(ctx, aeroCluster, podPIDMap, false)
223+
validateServerRestart(ctx, aeroCluster, podPIDMap, noRestart)
219224
},
220225
)
221226

222227
It(
223228
"Should update config statically", func() {
224229

225-
By("Modify static config")
230+
By("Modify static config to do warm restart")
226231

227232
aeroCluster, err := getCluster(
228233
k8sClient, ctx, clusterNamespacedName,
@@ -251,8 +256,41 @@ var _ = Describe(
251256
Expect(enableQuotas).To(BeTrue())
252257

253258
By("Verify warm restarts in Pods")
259+
validateServerRestart(ctx, aeroCluster, podPIDMap, asdbv1.OperationWarmRestart)
260+
261+
By("Modify static config to do pod restart")
262+
263+
aeroCluster, err = getCluster(
264+
k8sClient, ctx, clusterNamespacedName,
265+
)
266+
Expect(err).ToNot(HaveOccurred())
267+
268+
podPIDMap, err = getPodIDs(ctx, aeroCluster)
269+
Expect(err).ToNot(HaveOccurred())
270+
271+
nsList := aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{})
272+
nsList[1].(map[string]interface{})["storage-engine"].(map[string]interface{})["data-size"] = 2073741824
273+
274+
err = updateCluster(k8sClient, ctx, aeroCluster)
275+
Expect(err).ToNot(HaveOccurred())
276+
277+
By("Fetch and verify static configs")
254278

255-
validateServerRestart(ctx, aeroCluster, podPIDMap, true)
279+
pod = aeroCluster.Status.Pods[aeroCluster.Name+"-0-0"]
280+
281+
info, err := requestInfoFromNode(logger, k8sClient, ctx, clusterNamespacedName, "namespace/mem", &pod)
282+
Expect(err).ToNot(HaveOccurred())
283+
284+
confs := strings.Split(info["namespace/mem"], ";")
285+
for _, conf := range confs {
286+
if strings.Contains(conf, "data-size") {
287+
keyValue := strings.Split(conf, "=")
288+
Expect(keyValue[1]).To(Equal("2073741824"))
289+
}
290+
}
291+
292+
By("Verify pod restarts in Pods")
293+
validateServerRestart(ctx, aeroCluster, podPIDMap, asdbv1.OperationPodRestart)
256294
},
257295
)
258296

@@ -290,7 +328,7 @@ var _ = Describe(
290328
Expect(cv).To(Equal(int64(19000)))
291329

292330
By("Verify warm restarts in Pods")
293-
validateServerRestart(ctx, aeroCluster, podPIDMap, true)
331+
validateServerRestart(ctx, aeroCluster, podPIDMap, asdbv1.OperationWarmRestart)
294332
},
295333
)
296334
},
@@ -357,8 +395,8 @@ var _ = Describe(
357395

358396
// As there was no full config update failure,
359397
// expectation is that pods will not be restarted.
360-
By("Verify cold restarts in Pods")
361-
validateServerRestart(ctx, aeroCluster, podPIDMap, false)
398+
By("Verify no restarts in Pods")
399+
validateServerRestart(ctx, aeroCluster, podPIDMap, noRestart)
362400
},
363401
)
364402

@@ -409,8 +447,14 @@ var _ = Describe(
409447
Expect(err).ToNot(HaveOccurred())
410448

411449
// As pods were failed, expectation is that pods will be cold restarted.
412-
By("Verify cold restarts in Pods")
413-
validateServerRestart(ctx, aeroCluster, podPIDMap, true)
450+
By("Verify partial cold restart in Pods")
451+
operationTypeMap := map[string]asdbv1.OperationKind{
452+
aeroCluster.Name + "-0-0": noRestart,
453+
aeroCluster.Name + "-0-1": asdbv1.OperationPodRestart,
454+
}
455+
456+
err = validateOperationTypes(ctx, aeroCluster, podPIDMap, operationTypeMap)
457+
Expect(err).ToNot(HaveOccurred())
414458

415459
},
416460
)
@@ -524,7 +568,7 @@ var _ = Describe(
524568
Expect(err).ToNot(HaveOccurred())
525569

526570
By("Verify no warm/cold restarts in Pods")
527-
validateServerRestart(ctx, aeroCluster, podPIDMap, false)
571+
validateServerRestart(ctx, aeroCluster, podPIDMap, noRestart)
528572

529573
By("Verify Network Context configs dynamically")
530574
err = validateNetworkContextDynamically(ctx, flatServer, flatSpec, aeroCluster, dynamic)
@@ -536,7 +580,7 @@ var _ = Describe(
536580
Expect(err).ToNot(HaveOccurred())
537581

538582
By("Verify no warm/cold restarts in Pods")
539-
validateServerRestart(ctx, aeroCluster, podPIDMap, false)
583+
validateServerRestart(ctx, aeroCluster, podPIDMap, noRestart)
540584

541585
By("Verify Namespace Context configs dynamically")
542586
err = validateNamespaceContextDynamically(ctx, flatServer, flatSpec, aeroCluster, dynamic)
@@ -548,7 +592,7 @@ var _ = Describe(
548592
Expect(err).ToNot(HaveOccurred())
549593

550594
By("Verify no warm/cold restarts in Pods")
551-
validateServerRestart(ctx, aeroCluster, podPIDMap, false)
595+
validateServerRestart(ctx, aeroCluster, podPIDMap, noRestart)
552596

553597
By("Verify Security Context configs dynamically")
554598
err = validateSecurityContextDynamically(ctx, flatServer, flatSpec, aeroCluster, dynamic)
@@ -560,7 +604,7 @@ var _ = Describe(
560604
Expect(err).ToNot(HaveOccurred())
561605

562606
By("Verify no warm/cold restarts in Pods")
563-
validateServerRestart(ctx, aeroCluster, podPIDMap, false)
607+
validateServerRestart(ctx, aeroCluster, podPIDMap, noRestart)
564608

565609
By("Verify XDR Context configs dynamically")
566610
err = validateXDRContextDynamically(clusterNamespacedName,
@@ -573,7 +617,7 @@ var _ = Describe(
573617
Expect(err).ToNot(HaveOccurred())
574618

575619
By("Verify no warm/cold restarts in Pods")
576-
validateServerRestart(ctx, aeroCluster, podPIDMap, false)
620+
validateServerRestart(ctx, aeroCluster, podPIDMap, noRestart)
577621
},
578622
)
579623
},
@@ -671,30 +715,24 @@ var _ = Describe(
671715
}
672716

673717
By("Verify no warm/cold restarts in Pods")
674-
675-
validateServerRestart(ctx, aeroCluster, podPIDMap, false)
718+
validateServerRestart(ctx, aeroCluster, podPIDMap, noRestart)
676719
},
677720
)
678721
},
679722
)
680723
},
681724
)
682725

683-
func validateServerRestart(ctx goctx.Context, cluster *asdbv1.AerospikeCluster, pidMap map[string]podID,
684-
shouldRestart bool) {
685-
restarted := false
726+
func validateServerRestart(ctx goctx.Context, aeroCluster *asdbv1.AerospikeCluster, pidMap map[string]podID,
727+
restartType asdbv1.OperationKind) {
728+
operationTypeMap := make(map[string]asdbv1.OperationKind)
686729

687-
newPodPidMap, err := getPodIDs(ctx, cluster)
688-
Expect(err).ToNot(HaveOccurred())
689-
690-
for podName, pid := range pidMap {
691-
if newPodPidMap[podName].podUID != pid.podUID || newPodPidMap[podName].asdPID != pid.asdPID {
692-
restarted = true
693-
break
694-
}
730+
for podName := range pidMap {
731+
operationTypeMap[podName] = restartType
695732
}
696733

697-
Expect(restarted).To(Equal(shouldRestart))
734+
err := validateOperationTypes(ctx, aeroCluster, pidMap, operationTypeMap)
735+
Expect(err).ToNot(HaveOccurred())
698736
}
699737

700738
func getPodIDs(ctx context.Context, aeroCluster *asdbv1.AerospikeCluster) (map[string]podID, error) {

0 commit comments

Comments
 (0)