From b42097f56564d23c49d94c1a15fb474810e8517e Mon Sep 17 00:00:00 2001 From: Roelof Kuijpers Date: Wed, 9 Apr 2025 09:52:35 +0200 Subject: [PATCH 1/7] fix: stuck at 'Progressing' #15317 Signed-off-by: Roelof Kuijpers --- pkg/health/health_pod.go | 34 +++++--- ...-restart-never-with-ignore-annotation.yaml | 79 +++++++++++++++++++ 2 files changed, 103 insertions(+), 10 deletions(-) create mode 100644 pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go index 9ebcef558..c106a1f5e 100644 --- a/pkg/health/health_pod.go +++ b/pkg/health/health_pod.go @@ -12,6 +12,10 @@ import ( "github.com/argoproj/gitops-engine/pkg/utils/kube" ) +const ( + AnnotationIgnoreRestartPolicy = "argocd.argoproj.io/ignore-restart-policy" +) + func getPodHealth(obj *unstructured.Unstructured) (*HealthStatus, error) { gvk := obj.GroupVersionKind() switch gvk { @@ -93,9 +97,9 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { } return &HealthStatus{Status: HealthStatusDegraded, Message: ""}, nil + case corev1.PodRunning: - switch pod.Spec.RestartPolicy { - case corev1.RestartPolicyAlways: + getHealthStatus := func(pod *corev1.Pod) (*HealthStatus, error) { // if pod is ready, it is automatically healthy if podutils.IsPodReady(pod) { return &HealthStatus{ @@ -117,14 +121,24 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { Status: HealthStatusProgressing, Message: pod.Status.Message, }, nil - case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever: - // pods set with a restart policy of OnFailure or Never, have a finite life. - // These pods are typically resource hooks. Thus, we consider these as Progressing - // instead of healthy. - return &HealthStatus{ - Status: HealthStatusProgressing, - Message: pod.Status.Message, - }, nil + } + if _, hook := pod.Annotations[AnnotationIgnoreRestartPolicy]; hook { + return getHealthStatus(pod) + } else { + switch pod.Spec.RestartPolicy { + case corev1.RestartPolicyAlways: + return getHealthStatus(pod) + case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever: + // Most pods set with a restart policy of OnFailure or Never, have a finite life. + // These pods are typically resource hooks. Thus, we consider these as Progressing + // instead of healthy. If this is unwanted, e.g., when the pod is managed by an + // operator and therefore has a restart policy of OnFailure or Never, then use the + // the AnnotationIgnoreRestartPolicy annotation. + return &HealthStatus{ + Status: HealthStatusProgressing, + Message: pod.Status.Message, + }, nil + } } } return &HealthStatus{ diff --git a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml new file mode 100644 index 000000000..432473dbd --- /dev/null +++ b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml @@ -0,0 +1,79 @@ +apiVersion: v1 +kind: Pod +metadata: + creationTimestamp: 2018-12-02T09:15:16Z + name: my-pod + namespace: argocd + resourceVersion: "151053" + selfLink: /api/v1/namespaces/argocd/pods/my-pod + uid: c86e909c-f612-11e8-a057-fe5f49266390 + annotations: + argocd.argoproj.io/ignore-restart-policy: "true" +spec: + containers: + - command: + - sh + - -c + - sleep 10 + image: alpine:latest + imagePullPolicy: Always + name: main + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /var/run/secrets/kubernetes.io/serviceaccount + name: default-token-f9jvj + readOnly: true + dnsPolicy: ClusterFirst + nodeName: minikube + restartPolicy: Never + schedulerName: default-scheduler + securityContext: {} + serviceAccount: default + serviceAccountName: default + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 300 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 300 + volumes: + - name: default-token-f9jvj + secret: + defaultMode: 420 + secretName: default-token-f9jvj +status: + conditions: + - lastProbeTime: null + lastTransitionTime: 2018-12-02T09:15:16Z + status: "True" + type: Initialized + - lastProbeTime: null + lastTransitionTime: 2018-12-02T09:15:19Z + status: "True" + type: Ready + - lastProbeTime: null + lastTransitionTime: 2018-12-02T09:15:16Z + status: "True" + type: PodScheduled + containerStatuses: + - containerID: docker://acfb261d6c1fe8c543438a202de62cb06c137fa93a2d59262d764470e96f3195 + image: alpine:latest + imageID: docker-pullable://alpine@sha256:621c2f39f8133acb8e64023a94dbdf0d5ca81896102b9e57c0dc184cadaf5528 + lastState: {} + name: main + ready: true + restartCount: 0 + state: + running: + startedAt: 2018-12-02T09:15:19Z + hostIP: 192.168.64.41 + phase: Running + podIP: 172.17.0.9 + qosClass: BestEffort + startTime: 2018-12-02T09:15:16Z From f87b5c1ad1d4063d3b04ca97739c28688af7ff83 Mon Sep 17 00:00:00 2001 From: RoelofKuijpers Date: Wed, 9 Apr 2025 10:22:16 +0200 Subject: [PATCH 2/7] Update health_test.go Signed-off-by: Roelof Kuijpers --- pkg/health/health_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/health/health_test.go b/pkg/health/health_test.go index ef945eb46..20ddc5f5c 100644 --- a/pkg/health/health_test.go +++ b/pkg/health/health_test.go @@ -103,6 +103,7 @@ func TestPod(t *testing.T) { assertAppHealth(t, "./testdata/pod-error.yaml", HealthStatusDegraded) assertAppHealth(t, "./testdata/pod-running-restart-always.yaml", HealthStatusHealthy) assertAppHealth(t, "./testdata/pod-running-restart-never.yaml", HealthStatusProgressing) + assertAppHealth(t, "./testdata/pod-running-restart-never-with-ignore-annotation.yaml", HealthStatusHealthy) assertAppHealth(t, "./testdata/pod-running-restart-onfailure.yaml", HealthStatusProgressing) assertAppHealth(t, "./testdata/pod-failed.yaml", HealthStatusDegraded) assertAppHealth(t, "./testdata/pod-succeeded.yaml", HealthStatusHealthy) From 0b2fa72b4c9860891047244ffe55783bc71717b9 Mon Sep 17 00:00:00 2001 From: RoelofKuijpers Date: Wed, 9 Apr 2025 12:06:40 +0200 Subject: [PATCH 3/7] Update pod-running-restart-never-with-ignore-annotation.yaml so it passes the checks of the Quality Gate Signed-off-by: Roelof Kuijpers --- ...ng-restart-never-with-ignore-annotation.yaml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml index 432473dbd..4c2b3682a 100644 --- a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml +++ b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml @@ -15,10 +15,16 @@ spec: - sh - -c - sleep 10 - image: alpine:latest + image: alpine:3.21 imagePullPolicy: Always name: main - resources: {} + resources: + requests: + memory: "128Mi" + cpu: "250m" + limits: + memory: "256Mi" + cpu: "500m" terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: @@ -32,6 +38,7 @@ spec: securityContext: {} serviceAccount: default serviceAccountName: default + automountServiceAccountToken: false terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute @@ -62,9 +69,9 @@ status: status: "True" type: PodScheduled containerStatuses: - - containerID: docker://acfb261d6c1fe8c543438a202de62cb06c137fa93a2d59262d764470e96f3195 - image: alpine:latest - imageID: docker-pullable://alpine@sha256:621c2f39f8133acb8e64023a94dbdf0d5ca81896102b9e57c0dc184cadaf5528 + - containerID: containerd://adc73c2c0ae3f1fd9bf294abd834e740042ee375de680c0cfcdd90d863a73b8b + image: alpine:3.21 + imageID: docker.io/library/alpine@sha256:a8560b36e8b8210634f77d9f7f9efd7ffa463e380b75e2e74aff4511df3ef88c lastState: {} name: main ready: true From 689583dd101e46fac21f6b1ec0e30dd2522da1b9 Mon Sep 17 00:00:00 2001 From: Roelof Kuijpers Date: Wed, 9 Apr 2025 13:57:34 +0200 Subject: [PATCH 4/7] add storage request Signed-off-by: Roelof Kuijpers --- .../pod-running-restart-never-with-ignore-annotation.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml index 4c2b3682a..1cc27e5f1 100644 --- a/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml +++ b/pkg/health/testdata/pod-running-restart-never-with-ignore-annotation.yaml @@ -20,6 +20,7 @@ spec: name: main resources: requests: + ephemeral-storage: "100Mi" memory: "128Mi" cpu: "250m" limits: From ab66dbd07085c1dca351db1c70807f5de7bf5054 Mon Sep 17 00:00:00 2001 From: RoelofKuijpers Date: Mon, 21 Apr 2025 18:13:20 +0200 Subject: [PATCH 5/7] Update pkg/health/health_pod.go improve code readability Co-authored-by: sivchari Signed-off-by: Roelof Kuijpers --- pkg/health/health_pod.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go index c106a1f5e..7d874f284 100644 --- a/pkg/health/health_pod.go +++ b/pkg/health/health_pod.go @@ -122,13 +122,12 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { Message: pod.Status.Message, }, nil } - if _, hook := pod.Annotations[AnnotationIgnoreRestartPolicy]; hook { + policy := pod.Spec.RestartPolicy + if _, ok := pod.Annotations[AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways { return getHealthStatus(pod) - } else { - switch pod.Spec.RestartPolicy { - case corev1.RestartPolicyAlways: - return getHealthStatus(pod) - case corev1.RestartPolicyOnFailure, corev1.RestartPolicyNever: + } + + if policy == corev1.RestartPolicyOnFailure || policy == corev1.RestartPolicyNever { // Most pods set with a restart policy of OnFailure or Never, have a finite life. // These pods are typically resource hooks. Thus, we consider these as Progressing // instead of healthy. If this is unwanted, e.g., when the pod is managed by an From 13e9b51c7423b5047405825e801e17a6348c86a8 Mon Sep 17 00:00:00 2001 From: Roelof Kuijpers Date: Mon, 21 Apr 2025 18:30:38 +0200 Subject: [PATCH 6/7] Improve code readability fix Signed-off-by: Roelof Kuijpers --- pkg/health/health_pod.go | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go index 7d874f284..d54c52cb0 100644 --- a/pkg/health/health_pod.go +++ b/pkg/health/health_pod.go @@ -122,22 +122,21 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { Message: pod.Status.Message, }, nil } - policy := pod.Spec.RestartPolicy + policy := pod.Spec.RestartPolicy if _, ok := pod.Annotations[AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways { return getHealthStatus(pod) } - - if policy == corev1.RestartPolicyOnFailure || policy == corev1.RestartPolicyNever { - // Most pods set with a restart policy of OnFailure or Never, have a finite life. - // These pods are typically resource hooks. Thus, we consider these as Progressing - // instead of healthy. If this is unwanted, e.g., when the pod is managed by an - // operator and therefore has a restart policy of OnFailure or Never, then use the - // the AnnotationIgnoreRestartPolicy annotation. - return &HealthStatus{ - Status: HealthStatusProgressing, - Message: pod.Status.Message, - }, nil - } + + if policy == corev1.RestartPolicyOnFailure || policy == corev1.RestartPolicyNever { + // Most pods set with a restart policy of OnFailure or Never, have a finite life. + // These pods are typically resource hooks. Thus, we consider these as Progressing + // instead of healthy. If this is unwanted, e.g., when the pod is managed by an + // operator and therefore has a restart policy of OnFailure or Never, then use the + // the AnnotationIgnoreRestartPolicy annotation. + return &HealthStatus{ + Status: HealthStatusProgressing, + Message: pod.Status.Message, + }, nil } } return &HealthStatus{ From 8c321167bb4ba2507813bd9010d17e1b2a69d8f9 Mon Sep 17 00:00:00 2001 From: Roelof Kuijpers Date: Fri, 4 Jul 2025 00:47:56 +0200 Subject: [PATCH 7/7] Move annotation into common package Signed-off-by: Roelof Kuijpers --- pkg/health/health_pod.go | 7 ++----- pkg/sync/common/types.go | 3 +++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/health/health_pod.go b/pkg/health/health_pod.go index d54c52cb0..cd697d119 100644 --- a/pkg/health/health_pod.go +++ b/pkg/health/health_pod.go @@ -9,13 +9,10 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/kubectl/pkg/util/podutils" + "github.com/argoproj/gitops-engine/pkg/sync/common" "github.com/argoproj/gitops-engine/pkg/utils/kube" ) -const ( - AnnotationIgnoreRestartPolicy = "argocd.argoproj.io/ignore-restart-policy" -) - func getPodHealth(obj *unstructured.Unstructured) (*HealthStatus, error) { gvk := obj.GroupVersionKind() switch gvk { @@ -123,7 +120,7 @@ func getCorev1PodHealth(pod *corev1.Pod) (*HealthStatus, error) { }, nil } policy := pod.Spec.RestartPolicy - if _, ok := pod.Annotations[AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways { + if _, ok := pod.Annotations[common.AnnotationIgnoreRestartPolicy]; ok || policy == corev1.RestartPolicyAlways { return getHealthStatus(pod) } diff --git a/pkg/sync/common/types.go b/pkg/sync/common/types.go index 002bb23da..5981b9753 100644 --- a/pkg/sync/common/types.go +++ b/pkg/sync/common/types.go @@ -17,6 +17,9 @@ const ( // AnnotationKeyHookDeletePolicy is the policy of deleting a hook AnnotationKeyHookDeletePolicy = "argocd.argoproj.io/hook-delete-policy" AnnotationDeletionApproved = "argocd.argoproj.io/deletion-approved" + // AnnotationIgnoreRestartPolicy ignores restart policy, useful for operator-managed + // pods to be considered healthy + AnnotationIgnoreRestartPolicy = "argocd.argoproj.io/ignore-restart-policy" // Sync option that disables dry run in resource is missing in the cluster SyncOptionSkipDryRunOnMissingResource = "SkipDryRunOnMissingResource=true"