Skip to content

feat: add support for local PVC cleanup during upgrade and rolling restart [KO-427] #374

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/v1/aerospikecluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,11 @@ type AerospikeStorageSpec struct { //nolint:govet // for readability
// +optional
LocalStorageClasses []string `json:"localStorageClasses,omitempty"`

// DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
// by AKO. It only considers local storage classes given in the localStorageClasses field.
// +optional
DeleteLocalStorageOnRestart *bool `json:"deleteLocalStorageOnRestart,omitempty"`

// Volumes list to attach to created pods.
// +patchMergeKey=name
// +patchStrategy=merge
Expand Down
5 changes: 5 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6221,6 +6221,11 @@ spec:
description: CleanupThreads contains the maximum number
of cleanup threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default
policies for filesystem volumes.
Expand Down Expand Up @@ -7822,6 +7827,11 @@ spec:
description: CleanupThreads contains the maximum number
of cleanup threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default
policies for filesystem volumes.
Expand Down Expand Up @@ -8497,6 +8507,11 @@ spec:
description: CleanupThreads contains the maximum number of cleanup
threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default policies
for filesystem volumes.
Expand Down Expand Up @@ -15362,6 +15377,11 @@ spec:
description: CleanupThreads contains the maximum number
of cleanup threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default
policies for filesystem volumes.
Expand Down Expand Up @@ -16963,6 +16983,11 @@ spec:
description: CleanupThreads contains the maximum number
of cleanup threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default
policies for filesystem volumes.
Expand Down Expand Up @@ -17704,6 +17729,11 @@ spec:
description: CleanupThreads contains the maximum number of cleanup
threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default policies
for filesystem volumes.
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/aerospike/aerospike-client-go/v8 v8.2.1
github.com/aerospike/aerospike-management-lib v1.7.1-0.20250519063642-57d55e3eddf8
github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d
github.com/aws/smithy-go v1.22.3
github.com/deckarep/golang-set/v2 v2.3.1
github.com/evanphx/json-patch v4.12.0+incompatible
github.com/go-logr/logr v1.4.2
Expand All @@ -30,7 +31,6 @@ require (
require (
github.com/aerospike/backup-go v0.3.2-0.20250330113002-7fb1b5be7ffc // indirect
github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
github.com/aws/smithy-go v1.22.3 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6221,6 +6221,11 @@ spec:
description: CleanupThreads contains the maximum number
of cleanup threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default
policies for filesystem volumes.
Expand Down Expand Up @@ -7822,6 +7827,11 @@ spec:
description: CleanupThreads contains the maximum number
of cleanup threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default
policies for filesystem volumes.
Expand Down Expand Up @@ -8497,6 +8507,11 @@ spec:
description: CleanupThreads contains the maximum number of cleanup
threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default policies
for filesystem volumes.
Expand Down Expand Up @@ -15362,6 +15377,11 @@ spec:
description: CleanupThreads contains the maximum number
of cleanup threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default
policies for filesystem volumes.
Expand Down Expand Up @@ -16963,6 +16983,11 @@ spec:
description: CleanupThreads contains the maximum number
of cleanup threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default
policies for filesystem volumes.
Expand Down Expand Up @@ -17704,6 +17729,11 @@ spec:
description: CleanupThreads contains the maximum number of cleanup
threads(dd or blkdiscard) per init container.
type: integer
deleteLocalStorageOnRestart:
description: |-
DeleteLocalStorageOnRestart enables the deletion of local storage PVCs when a pod is restarted or rescheduled
by AKO. It only considers local storage classes given in the localStorageClasses field.
type: boolean
filesystemVolumePolicy:
description: FileSystemVolumePolicy contains default policies
for filesystem volumes.
Expand Down
29 changes: 18 additions & 11 deletions internal/controller/cluster/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,6 @@ func (r *SingleClusterReconciler) restartPods(
restartedPods := make([]*corev1.Pod, 0, len(podsToRestart))
restartedPodNames := make([]string, 0, len(podsToRestart))
restartedASDPodNames := make([]string, 0, len(podsToRestart))
blockedK8sNodes := sets.NewString(r.aeroCluster.Spec.K8sNodeBlockList...)

for idx := range podsToRestart {
pod := podsToRestart[idx]
Expand All @@ -379,10 +378,7 @@ func (r *SingleClusterReconciler) restartPods(

restartedASDPodNames = append(restartedASDPodNames, pod.Name)
} else if restartType == podRestart {
if blockedK8sNodes.Has(pod.Spec.NodeName) {
r.Log.Info("Pod found in blocked nodes list, deleting corresponding local PVCs if any",
"podName", pod.Name)

if r.isLocalPVCDeletionRequired(rackState, pod) {
if err := r.deleteLocalPVCs(rackState, pod); err != nil {
return common.ReconcileError(err)
}
Expand Down Expand Up @@ -561,14 +557,9 @@ func (r *SingleClusterReconciler) deletePodAndEnsureImageUpdated(
return common.ReconcileError(err)
}

blockedK8sNodes := sets.NewString(r.aeroCluster.Spec.K8sNodeBlockList...)

// Delete pods
for _, pod := range podsToUpdate {
if blockedK8sNodes.Has(pod.Spec.NodeName) {
r.Log.Info("Pod found in blocked nodes list, deleting corresponding local PVCs if any",
"podName", pod.Name)

if r.isLocalPVCDeletionRequired(rackState, pod) {
if err := r.deleteLocalPVCs(rackState, pod); err != nil {
return common.ReconcileError(err)
}
Expand All @@ -588,6 +579,22 @@ func (r *SingleClusterReconciler) deletePodAndEnsureImageUpdated(
return r.ensurePodsImageUpdated(podsToUpdate)
}

func (r *SingleClusterReconciler) isLocalPVCDeletionRequired(rackState *RackState, pod *corev1.Pod) bool {
if utils.ContainsString(r.aeroCluster.Spec.K8sNodeBlockList, pod.Spec.NodeName) {
r.Log.Info("Pod found in blocked nodes list, deleting corresponding local PVCs if any",
"podName", pod.Name)
return true
}

if asdbv1.GetBool(rackState.Rack.Storage.DeleteLocalStorageOnRestart) {
r.Log.Info("deleteLocalStorageOnRestart flag is enabled, deleting corresponding local PVCs if any",
"podName", pod.Name)
return true
}

return false
}

func (r *SingleClusterReconciler) ensurePodsImageUpdated(podsToCheck []*corev1.Pod) common.ReconcileResult {
podNames := getPodNames(podsToCheck)
updatedPods := sets.Set[string]{}
Expand Down
6 changes: 6 additions & 0 deletions internal/webhook/v1/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,12 @@ func getAerospikeStorageList(storage *asdbv1.AerospikeStorageSpec, onlyPV bool)
func validateStorage(
storage *asdbv1.AerospikeStorageSpec, podSpec *asdbv1.AerospikePodSpec,
) error {
if asdbv1.GetBool(storage.DeleteLocalStorageOnRestart) && len(storage.LocalStorageClasses) == 0 {
return fmt.Errorf(
"deleteLocalStorageOnRestart is set to true, but no local storage classes are defined",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Replace local storage classes with localStorageClasses
-> localStorageClasses cannot be empty if deleteLocalStorageOnRestart is set.

)
}

reservedPaths := map[string]int{
// Reserved mount paths for the operator.
"/etc/aerospike": 1,
Expand Down
30 changes: 16 additions & 14 deletions test/cluster/k8snode_block_list_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ var _ = Describe(
Expect(err).ToNot(HaveOccurred())

By("Verifying if the pod is migrated to other nodes and pod pvcs are not deleted")
validatePodAndPVCMigration(ctx, podName, oldK8sNode, oldPvcInfo, true)
validatePodAndPVCMigration(ctx, podName, oldK8sNode, oldPvcInfo, false)
},
)

Expand All @@ -123,7 +123,7 @@ var _ = Describe(
Expect(err).ToNot(HaveOccurred())

By("Verifying if the pod is migrated to other nodes and pod pvcs are not deleted")
validatePodAndPVCMigration(ctx, podName, oldK8sNode, oldPvcInfo, true)
validatePodAndPVCMigration(ctx, podName, oldK8sNode, oldPvcInfo, false)
},
)

Expand All @@ -139,7 +139,7 @@ var _ = Describe(
Expect(err).ToNot(HaveOccurred())

By("Verifying if the pod is migrated to other nodes and pod pvcs are not deleted")
validatePodAndPVCMigration(ctx, podName, oldK8sNode, oldPvcInfo, true)
validatePodAndPVCMigration(ctx, podName, oldK8sNode, oldPvcInfo, false)
},
)

Expand All @@ -155,7 +155,7 @@ var _ = Describe(
Expect(err).ToNot(HaveOccurred())

By("Verifying if the pod is migrated to other nodes and pod local pvcs are deleted")
validatePodAndPVCMigration(ctx, podName, oldK8sNode, oldPvcInfo, false)
validatePodAndPVCMigration(ctx, podName, oldK8sNode, oldPvcInfo, true)
},
)

Expand All @@ -181,7 +181,7 @@ var _ = Describe(
Expect(err).ToNot(HaveOccurred())

By("Verifying if the failed pod is migrated to other nodes and pod pvcs are not deleted")
validatePodAndPVCMigration(ctx, podName, oldK8sNode, oldPvcInfo, true)
validatePodAndPVCMigration(ctx, podName, oldK8sNode, oldPvcInfo, false)
},
)
},
Expand Down Expand Up @@ -210,7 +210,7 @@ func extractPodPVC(pod *corev1.Pod) (map[string]types.UID, error) {
return pvcUIDMap, nil
}

func validatePVCDeletion(ctx context.Context, pvcUIDMap map[string]types.UID, shouldDelete bool) error {
func validatePVCDeletion(ctx context.Context, pvcUIDMap map[string]types.UID, shouldDeletePVC bool) error {
pvc := &corev1.PersistentVolumeClaim{}

for pvcName, pvcUID := range pvcUIDMap {
Expand All @@ -222,25 +222,27 @@ func validatePVCDeletion(ctx context.Context, pvcUIDMap map[string]types.UID, sh
return err
}

if shouldDelete && pvc.UID != pvcUID {
return fmt.Errorf("PVC %s is unintentionally deleted", pvcName)
}

if !shouldDelete && pvc.UID == pvcUID {
return fmt.Errorf("PVC %s is not deleted", pvcName)
if shouldDeletePVC {
if pvc.UID == pvcUID {
return fmt.Errorf("PVC %s is not deleted", pvcName)
}
} else {
if pvc.UID != pvcUID {
return fmt.Errorf("PVC %s is unintentionally deleted", pvcName)
}
}
}

return nil
}

func validatePodAndPVCMigration(ctx context.Context, podName, oldK8sNode string,
oldPvcInfo map[string]types.UID, shouldDelete bool) {
oldPvcInfo map[string]types.UID, shouldDeletePVC bool) {
pod := &corev1.Pod{}
err := k8sClient.Get(ctx, test.GetNamespacedName(podName, namespace), pod)
Expect(err).ToNot(HaveOccurred())
Expect(pod.Spec.NodeName).ToNot(Equal(oldK8sNode))

err = validatePVCDeletion(ctx, oldPvcInfo, shouldDelete)
err = validatePVCDeletion(ctx, oldPvcInfo, shouldDeletePVC)
Expect(err).ToNot(HaveOccurred())
}
Loading
Loading