From cbbdd5a508fcb153a11079cf38a362f4f2a0026e Mon Sep 17 00:00:00 2001 From: Justin Miron Date: Thu, 22 May 2025 20:34:26 -0500 Subject: [PATCH 1/4] MachineHealthCheck supports checking Machine conditions MachineHealthCheck currently only allows checking Node conditions to validate if a machine is healthy. However, machine conditions capture conditions that do not exist on nodes, for example, control plane node conditions such as EtcdPodHealthy, SchedulerPodHealthy that can indicate if a controlplane machine has been created correctly. Adding support for Machine conditions enables us to perform remediation during control plane upgrades. This PR introduces a new fieldas part of the MachineHealthCheckSpec: - `UnhealthyMachineConditions` This will mirror the behavior of `UnhealthyNodeConditions` but the MachineHealthCheck controller will instead check the machine conditions. --- api/core/v1beta1/machinehealthcheck_types.go | 36 +++++ api/core/v1beta1/zz_generated.conversion.go | 36 +++++ api/core/v1beta1/zz_generated.deepcopy.go | 21 +++ api/core/v1beta1/zz_generated.openapi.go | 55 +++++++- api/core/v1beta2/machine_types.go | 4 + api/core/v1beta2/machinehealthcheck_types.go | 36 +++++ api/core/v1beta2/v1beta1_condition_consts.go | 3 + api/core/v1beta2/zz_generated.deepcopy.go | 21 +++ api/core/v1beta2/zz_generated.openapi.go | 55 +++++++- .../cluster.x-k8s.io_machinehealthchecks.yaml | 132 ++++++++++++++++++ .../core/v1alpha3/machinehealthcheck_types.go | 36 +++++ .../core/v1alpha3/zz_generated.conversion.go | 36 +++++ .../core/v1alpha3/zz_generated.deepcopy.go | 21 +++ .../core/v1alpha4/machinehealthcheck_types.go | 36 +++++ .../core/v1alpha4/zz_generated.conversion.go | 36 +++++ .../core/v1alpha4/zz_generated.deepcopy.go | 21 +++ .../machinehealthcheck_targets.go | 44 +++++- .../machinehealthcheck_targets_test.go | 75 ++++++++++ 18 files changed, 701 insertions(+), 3 deletions(-) diff --git a/api/core/v1beta1/machinehealthcheck_types.go b/api/core/v1beta1/machinehealthcheck_types.go index 6ca33d4e2663..bcf6f9f694eb 100644 --- a/api/core/v1beta1/machinehealthcheck_types.go +++ b/api/core/v1beta1/machinehealthcheck_types.go @@ -69,6 +69,14 @@ type MachineHealthCheckSpec struct { // +kubebuilder:validation:MaxItems=100 UnhealthyConditions []UnhealthyCondition `json:"unhealthyConditions,omitempty"` + // unhealthyMachineConditions contains a list of the machine conditions that determine + // whether a node is considered unhealthy. The conditions are combined in a + // logical OR, i.e. if any of the conditions is met, the node is unhealthy. + // + // +optional + // +kubebuilder:validation:MaxItems=100 + UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"` + // maxUnhealthy specifies the maximum number of unhealthy machines allowed. // Any further remediation is only allowed if at most "maxUnhealthy" machines selected by // "selector" are not healthy. @@ -148,6 +156,34 @@ type UnhealthyCondition struct { // ANCHOR_END: UnhealthyCondition +// ANCHOR: UnhealthyMachineCondition + +// UnhealthyMachineCondition represents a Node condition type and value with a timeout +// specified as a duration. When the named condition has been in the given +// status for at least the timeout value, a node is considered unhealthy. +type UnhealthyMachineCondition struct { + // type of Node condition + // +kubebuilder:validation:Type=string + // +kubebuilder:validation:MinLength=1 + // +required + Type string `json:"type"` + + // status of the condition, one of True, False, Unknown. + // +kubebuilder:validation:Type=string + // +kubebuilder:validation:MinLength=1 + // +required + Status metav1.ConditionStatus `json:"status"` + + // timeout is the duration that a node must be in a given status for, + // after which the node is considered unhealthy. + // For example, with a value of "1h", the node must match the status + // for at least 1 hour before being considered unhealthy. + // +required + Timeout metav1.Duration `json:"timeout"` +} + +// ANCHOR_END: UnhealthyMachineCondition + // ANCHOR: MachineHealthCheckStatus // MachineHealthCheckStatus defines the observed state of MachineHealthCheck. diff --git a/api/core/v1beta1/zz_generated.conversion.go b/api/core/v1beta1/zz_generated.conversion.go index 6f7b7ee13770..25d49308755a 100644 --- a/api/core/v1beta1/zz_generated.conversion.go +++ b/api/core/v1beta1/zz_generated.conversion.go @@ -725,6 +725,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*UnhealthyMachineCondition)(nil), (*v1beta2.UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(a.(*UnhealthyMachineCondition), b.(*v1beta2.UnhealthyMachineCondition), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*v1beta2.UnhealthyMachineCondition)(nil), (*UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta2_UnhealthyMachineCondition_To_v1beta1_UnhealthyMachineCondition(a.(*v1beta2.UnhealthyMachineCondition), b.(*UnhealthyMachineCondition), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*ValidationRule)(nil), (*v1beta2.ValidationRule)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_ValidationRule_To_v1beta2_ValidationRule(a.(*ValidationRule), b.(*v1beta2.ValidationRule), scope) }); err != nil { @@ -2747,6 +2757,7 @@ func autoConvert_v1beta1_MachineHealthCheckSpec_To_v1beta2_MachineHealthCheckSpe out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyConditions requires manual conversion: does not exist in peer-type + out.UnhealthyMachineConditions = *(*[]v1beta2.UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) out.UnhealthyRange = (*string)(unsafe.Pointer(in.UnhealthyRange)) out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) @@ -2758,6 +2769,7 @@ func autoConvert_v1beta2_MachineHealthCheckSpec_To_v1beta1_MachineHealthCheckSpe out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyNodeConditions requires manual conversion: does not exist in peer-type + out.UnhealthyMachineConditions = *(*[]UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) out.UnhealthyRange = (*string)(unsafe.Pointer(in.UnhealthyRange)) out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) @@ -3823,6 +3835,30 @@ func autoConvert_v1beta2_Topology_To_v1beta1_Topology(in *v1beta2.Topology, out return nil } +func autoConvert_v1beta1_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { + out.Type = in.Type + out.Status = v1.ConditionStatus(in.Status) + out.Timeout = in.Timeout + return nil +} + +// Convert_v1beta1_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition is an autogenerated conversion function. +func Convert_v1beta1_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { + return autoConvert_v1beta1_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in, out, s) +} + +func autoConvert_v1beta2_UnhealthyMachineCondition_To_v1beta1_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { + out.Type = in.Type + out.Status = v1.ConditionStatus(in.Status) + out.Timeout = in.Timeout + return nil +} + +// Convert_v1beta2_UnhealthyMachineCondition_To_v1beta1_UnhealthyMachineCondition is an autogenerated conversion function. +func Convert_v1beta2_UnhealthyMachineCondition_To_v1beta1_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { + return autoConvert_v1beta2_UnhealthyMachineCondition_To_v1beta1_UnhealthyMachineCondition(in, out, s) +} + func autoConvert_v1beta1_ValidationRule_To_v1beta2_ValidationRule(in *ValidationRule, out *v1beta2.ValidationRule, s conversion.Scope) error { out.Rule = in.Rule out.Message = in.Message diff --git a/api/core/v1beta1/zz_generated.deepcopy.go b/api/core/v1beta1/zz_generated.deepcopy.go index a1b090669cb4..f0cc5caea8c4 100644 --- a/api/core/v1beta1/zz_generated.deepcopy.go +++ b/api/core/v1beta1/zz_generated.deepcopy.go @@ -1838,6 +1838,11 @@ func (in *MachineHealthCheckSpec) DeepCopyInto(out *MachineHealthCheckSpec) { *out = make([]UnhealthyCondition, len(*in)) copy(*out, *in) } + if in.UnhealthyMachineConditions != nil { + in, out := &in.UnhealthyMachineConditions, &out.UnhealthyMachineConditions + *out = make([]UnhealthyMachineCondition, len(*in)) + copy(*out, *in) + } if in.MaxUnhealthy != nil { in, out := &in.MaxUnhealthy, &out.MaxUnhealthy *out = new(intstr.IntOrString) @@ -2905,6 +2910,22 @@ func (in *UnhealthyCondition) DeepCopy() *UnhealthyCondition { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *UnhealthyMachineCondition) DeepCopyInto(out *UnhealthyMachineCondition) { + *out = *in + out.Timeout = in.Timeout +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UnhealthyMachineCondition. +func (in *UnhealthyMachineCondition) DeepCopy() *UnhealthyMachineCondition { + if in == nil { + return nil + } + out := new(UnhealthyMachineCondition) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ValidationRule) DeepCopyInto(out *ValidationRule) { *out = *in diff --git a/api/core/v1beta1/zz_generated.openapi.go b/api/core/v1beta1/zz_generated.openapi.go index 13a78b23719c..5ed4ae360e7f 100644 --- a/api/core/v1beta1/zz_generated.openapi.go +++ b/api/core/v1beta1/zz_generated.openapi.go @@ -121,6 +121,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "sigs.k8s.io/cluster-api/api/core/v1beta1.RemediationStrategy": schema_cluster_api_api_core_v1beta1_RemediationStrategy(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.Topology": schema_cluster_api_api_core_v1beta1_Topology(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyCondition": schema_cluster_api_api_core_v1beta1_UnhealthyCondition(ref), + "sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyMachineCondition": schema_cluster_api_api_core_v1beta1_UnhealthyMachineCondition(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.ValidationRule": schema_cluster_api_api_core_v1beta1_ValidationRule(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.VariableSchema": schema_cluster_api_api_core_v1beta1_VariableSchema(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.VariableSchemaMetadata": schema_cluster_api_api_core_v1beta1_VariableSchemaMetadata(ref), @@ -3181,6 +3182,20 @@ func schema_cluster_api_api_core_v1beta1_MachineHealthCheckSpec(ref common.Refer }, }, }, + "unhealthyMachineConditions": { + SchemaProps: spec.SchemaProps{ + Description: "unhealthyMachineConditions contains a list of the machine conditions that determine whether a node is considered unhealthy. The conditions are combined in a logical OR, i.e. if any of the conditions is met, the node is unhealthy.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyMachineCondition"), + }, + }, + }, + }, + }, "maxUnhealthy": { SchemaProps: spec.SchemaProps{ Description: "maxUnhealthy specifies the maximum number of unhealthy machines allowed. Any further remediation is only allowed if at most \"maxUnhealthy\" machines selected by \"selector\" are not healthy.\n\nDeprecated: This field is deprecated and is going to be removed in the next apiVersion. Please see https://github.com/kubernetes-sigs/cluster-api/issues/10722 for more details.", @@ -3211,7 +3226,7 @@ func schema_cluster_api_api_core_v1beta1_MachineHealthCheckSpec(ref common.Refer }, }, Dependencies: []string{ - "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector", "k8s.io/apimachinery/pkg/util/intstr.IntOrString", "sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyCondition"}, + "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector", "k8s.io/apimachinery/pkg/util/intstr.IntOrString", "sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyCondition", "sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyMachineCondition"}, } } @@ -5140,6 +5155,44 @@ func schema_cluster_api_api_core_v1beta1_UnhealthyCondition(ref common.Reference } } +func schema_cluster_api_api_core_v1beta1_UnhealthyMachineCondition(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "UnhealthyMachineCondition represents a Node condition type and value with a timeout specified as a duration. When the named condition has been in the given status for at least the timeout value, a node is considered unhealthy.", + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "type": { + SchemaProps: spec.SchemaProps{ + Description: "type of Node condition", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "status": { + SchemaProps: spec.SchemaProps{ + Description: "status of the condition, one of True, False, Unknown.", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "timeout": { + SchemaProps: spec.SchemaProps{ + Description: "timeout is the duration that a node must be in a given status for, after which the node is considered unhealthy. For example, with a value of \"1h\", the node must match the status for at least 1 hour before being considered unhealthy.", + Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), + }, + }, + }, + Required: []string{"type", "status", "timeout"}, + }, + }, + Dependencies: []string{ + "k8s.io/apimachinery/pkg/apis/meta/v1.Duration"}, + } +} + func schema_cluster_api_api_core_v1beta1_ValidationRule(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ diff --git a/api/core/v1beta2/machine_types.go b/api/core/v1beta2/machine_types.go index d6830324ce5e..7854f64b280a 100644 --- a/api/core/v1beta2/machine_types.go +++ b/api/core/v1beta2/machine_types.go @@ -276,6 +276,10 @@ const ( // defined by a MachineHealthCheck object. MachineHealthCheckUnhealthyNodeReason = "UnhealthyNode" + // MachineHealthCheckUnhealthyMachineReason surfaces when the machine does not pass the health checks + // defined by a MachineHealthCheck object. + MachineHealthCheckUnhealthyMachineReason = "UnhealthyMachine" + // MachineHealthCheckNodeStartupTimeoutReason surfaces when the node hosted on the machine does not appear within // the timeout defined by a MachineHealthCheck object. MachineHealthCheckNodeStartupTimeoutReason = "NodeStartupTimeout" diff --git a/api/core/v1beta2/machinehealthcheck_types.go b/api/core/v1beta2/machinehealthcheck_types.go index 606075cc0749..556c7fd862a3 100644 --- a/api/core/v1beta2/machinehealthcheck_types.go +++ b/api/core/v1beta2/machinehealthcheck_types.go @@ -69,6 +69,14 @@ type MachineHealthCheckSpec struct { // +kubebuilder:validation:MaxItems=100 UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"` + // unhealthyMachineConditions contains a list of the machine conditions that determine + // whether a node is considered unhealthy. The conditions are combined in a + // logical OR, i.e. if any of the conditions is met, the node is unhealthy. + // + // +optional + // +kubebuilder:validation:MaxItems=100 + UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"` + // maxUnhealthy specifies the maximum number of unhealthy machines allowed. // Any further remediation is only allowed if at most "maxUnhealthy" machines selected by // "selector" are not healthy. @@ -148,6 +156,34 @@ type UnhealthyNodeCondition struct { // ANCHOR_END: UnhealthyNodeCondition +// ANCHOR: UnhealthyMachineCondition + +// UnhealthyMachineCondition represents a Node condition type and value with a timeout +// specified as a duration. When the named condition has been in the given +// status for at least the timeout value, a node is considered unhealthy. +type UnhealthyMachineCondition struct { + // type of Node condition + // +kubebuilder:validation:Type=string + // +kubebuilder:validation:MinLength=1 + // +required + Type string `json:"type"` + + // status of the condition, one of True, False, Unknown. + // +kubebuilder:validation:Type=string + // +kubebuilder:validation:MinLength=1 + // +required + Status metav1.ConditionStatus `json:"status"` + + // timeout is the duration that a node must be in a given status for, + // after which the node is considered unhealthy. + // For example, with a value of "1h", the node must match the status + // for at least 1 hour before being considered unhealthy. + // +required + Timeout metav1.Duration `json:"timeout"` +} + +// ANCHOR_END: UnhealthyMachineCondition + // ANCHOR: MachineHealthCheckStatus // MachineHealthCheckStatus defines the observed state of MachineHealthCheck. diff --git a/api/core/v1beta2/v1beta1_condition_consts.go b/api/core/v1beta2/v1beta1_condition_consts.go index b6445dc0b5fe..2dafd2a9665c 100644 --- a/api/core/v1beta2/v1beta1_condition_consts.go +++ b/api/core/v1beta2/v1beta1_condition_consts.go @@ -161,6 +161,9 @@ const ( // UnhealthyNodeConditionV1Beta1Reason is the reason used when a machine's node has one of the MachineHealthCheck's unhealthy conditions. UnhealthyNodeConditionV1Beta1Reason = "UnhealthyNode" + + // UnhealthyMachineConditionV1Beta1Reason is the reason used when a machine has one of the MachineHealthCheck's unhealthy conditions. + UnhealthyMachineConditionV1Beta1Reason = "UnhealthyMachine" ) const ( diff --git a/api/core/v1beta2/zz_generated.deepcopy.go b/api/core/v1beta2/zz_generated.deepcopy.go index 653fd309eb6b..0772d1d8a490 100644 --- a/api/core/v1beta2/zz_generated.deepcopy.go +++ b/api/core/v1beta2/zz_generated.deepcopy.go @@ -1984,6 +1984,11 @@ func (in *MachineHealthCheckSpec) DeepCopyInto(out *MachineHealthCheckSpec) { *out = make([]UnhealthyNodeCondition, len(*in)) copy(*out, *in) } + if in.UnhealthyMachineConditions != nil { + in, out := &in.UnhealthyMachineConditions, &out.UnhealthyMachineConditions + *out = make([]UnhealthyMachineCondition, len(*in)) + copy(*out, *in) + } if in.MaxUnhealthy != nil { in, out := &in.MaxUnhealthy, &out.MaxUnhealthy *out = new(intstr.IntOrString) @@ -3122,6 +3127,22 @@ func (in *Topology) DeepCopy() *Topology { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *UnhealthyMachineCondition) DeepCopyInto(out *UnhealthyMachineCondition) { + *out = *in + out.Timeout = in.Timeout +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UnhealthyMachineCondition. +func (in *UnhealthyMachineCondition) DeepCopy() *UnhealthyMachineCondition { + if in == nil { + return nil + } + out := new(UnhealthyMachineCondition) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *UnhealthyNodeCondition) DeepCopyInto(out *UnhealthyNodeCondition) { *out = *in diff --git a/api/core/v1beta2/zz_generated.openapi.go b/api/core/v1beta2/zz_generated.openapi.go index c83e5b6f4029..f1c11bb5f4e3 100644 --- a/api/core/v1beta2/zz_generated.openapi.go +++ b/api/core/v1beta2/zz_generated.openapi.go @@ -132,6 +132,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "sigs.k8s.io/cluster-api/api/core/v1beta2.PatchSelectorMatchMachinePoolClass": schema_cluster_api_api_core_v1beta2_PatchSelectorMatchMachinePoolClass(ref), "sigs.k8s.io/cluster-api/api/core/v1beta2.RemediationStrategy": schema_cluster_api_api_core_v1beta2_RemediationStrategy(ref), "sigs.k8s.io/cluster-api/api/core/v1beta2.Topology": schema_cluster_api_api_core_v1beta2_Topology(ref), + "sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyMachineCondition": schema_cluster_api_api_core_v1beta2_UnhealthyMachineCondition(ref), "sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyNodeCondition": schema_cluster_api_api_core_v1beta2_UnhealthyNodeCondition(ref), "sigs.k8s.io/cluster-api/api/core/v1beta2.ValidationRule": schema_cluster_api_api_core_v1beta2_ValidationRule(ref), "sigs.k8s.io/cluster-api/api/core/v1beta2.VariableSchema": schema_cluster_api_api_core_v1beta2_VariableSchema(ref), @@ -3346,6 +3347,20 @@ func schema_cluster_api_api_core_v1beta2_MachineHealthCheckSpec(ref common.Refer }, }, }, + "unhealthyMachineConditions": { + SchemaProps: spec.SchemaProps{ + Description: "unhealthyMachineConditions contains a list of the machine conditions that determine whether a node is considered unhealthy. The conditions are combined in a logical OR, i.e. if any of the conditions is met, the node is unhealthy.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyMachineCondition"), + }, + }, + }, + }, + }, "maxUnhealthy": { SchemaProps: spec.SchemaProps{ Description: "maxUnhealthy specifies the maximum number of unhealthy machines allowed. Any further remediation is only allowed if at most \"maxUnhealthy\" machines selected by \"selector\" are not healthy.\n\nDeprecated: This field is deprecated and is going to be removed in the next apiVersion. Please see https://github.com/kubernetes-sigs/cluster-api/issues/10722 for more details.", @@ -3376,7 +3391,7 @@ func schema_cluster_api_api_core_v1beta2_MachineHealthCheckSpec(ref common.Refer }, }, Dependencies: []string{ - "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector", "k8s.io/apimachinery/pkg/util/intstr.IntOrString", "sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyNodeCondition"}, + "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector", "k8s.io/apimachinery/pkg/util/intstr.IntOrString", "sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyMachineCondition", "sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyNodeCondition"}, } } @@ -5324,6 +5339,44 @@ func schema_cluster_api_api_core_v1beta2_Topology(ref common.ReferenceCallback) } } +func schema_cluster_api_api_core_v1beta2_UnhealthyMachineCondition(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "UnhealthyMachineCondition represents a Node condition type and value with a timeout specified as a duration. When the named condition has been in the given status for at least the timeout value, a node is considered unhealthy.", + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "type": { + SchemaProps: spec.SchemaProps{ + Description: "type of Node condition", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "status": { + SchemaProps: spec.SchemaProps{ + Description: "status of the condition, one of True, False, Unknown.", + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + "timeout": { + SchemaProps: spec.SchemaProps{ + Description: "timeout is the duration that a node must be in a given status for, after which the node is considered unhealthy. For example, with a value of \"1h\", the node must match the status for at least 1 hour before being considered unhealthy.", + Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), + }, + }, + }, + Required: []string{"type", "status", "timeout"}, + }, + }, + Dependencies: []string{ + "k8s.io/apimachinery/pkg/apis/meta/v1.Duration"}, + } +} + func schema_cluster_api_api_core_v1beta2_UnhealthyNodeCondition(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ diff --git a/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml b/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml index 64318f554237..78e1b0a734fb 100644 --- a/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml @@ -209,6 +209,39 @@ spec: type: object minItems: 1 type: array + unhealthyMachineConditions: + description: |- + unhealthyMachineConditions contains a list of the machine conditions that determine + whether a node is considered unhealthy. The conditions are combined in a + logical OR, i.e. if any of the conditions is met, the node is unhealthy. + items: + description: |- + UnhealthyMachineCondition represents a Node condition type and value with a timeout + specified as a duration. When the named condition has been in the given + status for at least the timeout value, a node is considered unhealthy. + properties: + status: + description: status of the condition, one of True, False, Unknown. + minLength: 1 + type: string + timeout: + description: |- + timeout is the duration that a node must be in a given status for, + after which the node is considered unhealthy. + For example, with a value of "1h", the node must match the status + for at least 1 hour before being considered unhealthy. + type: string + type: + description: type of Node condition + minLength: 1 + type: string + required: + - status + - timeout + - type + type: object + maxItems: 100 + type: array required: - clusterName - selector @@ -498,6 +531,39 @@ spec: type: object minItems: 1 type: array + unhealthyMachineConditions: + description: |- + unhealthyMachineConditions contains a list of the machine conditions that determine + whether a node is considered unhealthy. The conditions are combined in a + logical OR, i.e. if any of the conditions is met, the node is unhealthy. + items: + description: |- + UnhealthyMachineCondition represents a Node condition type and value with a timeout + specified as a duration. When the named condition has been in the given + status for at least the timeout value, a node is considered unhealthy. + properties: + status: + description: status of the condition, one of True, False, Unknown. + minLength: 1 + type: string + timeout: + description: |- + timeout is the duration that a node must be in a given status for, + after which the node is considered unhealthy. + For example, with a value of "1h", the node must match the status + for at least 1 hour before being considered unhealthy. + type: string + type: + description: type of Node condition + minLength: 1 + type: string + required: + - status + - timeout + - type + type: object + maxItems: 100 + type: array unhealthyRange: description: |- unhealthyRange specifies the range of unhealthy machines allowed. @@ -806,6 +872,39 @@ spec: type: object maxItems: 100 type: array + unhealthyMachineConditions: + description: |- + unhealthyMachineConditions contains a list of the machine conditions that determine + whether a node is considered unhealthy. The conditions are combined in a + logical OR, i.e. if any of the conditions is met, the node is unhealthy. + items: + description: |- + UnhealthyMachineCondition represents a Node condition type and value with a timeout + specified as a duration. When the named condition has been in the given + status for at least the timeout value, a node is considered unhealthy. + properties: + status: + description: status of the condition, one of True, False, Unknown. + minLength: 1 + type: string + timeout: + description: |- + timeout is the duration that a node must be in a given status for, + after which the node is considered unhealthy. + For example, with a value of "1h", the node must match the status + for at least 1 hour before being considered unhealthy. + type: string + type: + description: type of Node condition + minLength: 1 + type: string + required: + - status + - timeout + - type + type: object + maxItems: 100 + type: array unhealthyRange: description: |- unhealthyRange specifies the range of unhealthy machines allowed. @@ -1163,6 +1262,39 @@ spec: type: object type: object x-kubernetes-map-type: atomic + unhealthyMachineConditions: + description: |- + unhealthyMachineConditions contains a list of the machine conditions that determine + whether a node is considered unhealthy. The conditions are combined in a + logical OR, i.e. if any of the conditions is met, the node is unhealthy. + items: + description: |- + UnhealthyMachineCondition represents a Node condition type and value with a timeout + specified as a duration. When the named condition has been in the given + status for at least the timeout value, a node is considered unhealthy. + properties: + status: + description: status of the condition, one of True, False, Unknown. + minLength: 1 + type: string + timeout: + description: |- + timeout is the duration that a node must be in a given status for, + after which the node is considered unhealthy. + For example, with a value of "1h", the node must match the status + for at least 1 hour before being considered unhealthy. + type: string + type: + description: type of Node condition + minLength: 1 + type: string + required: + - status + - timeout + - type + type: object + maxItems: 100 + type: array unhealthyNodeConditions: description: |- unhealthyNodeConditions contains a list of conditions that determine diff --git a/internal/apis/core/v1alpha3/machinehealthcheck_types.go b/internal/apis/core/v1alpha3/machinehealthcheck_types.go index ee583d6a68b9..d4b6a63e06d6 100644 --- a/internal/apis/core/v1alpha3/machinehealthcheck_types.go +++ b/internal/apis/core/v1alpha3/machinehealthcheck_types.go @@ -40,6 +40,14 @@ type MachineHealthCheckSpec struct { // +kubebuilder:validation:MinItems=1 UnhealthyConditions []UnhealthyCondition `json:"unhealthyConditions"` + // unhealthyMachineConditions contains a list of the machine conditions that determine + // whether a node is considered unhealthy. The conditions are combined in a + // logical OR, i.e. if any of the conditions is met, the node is unhealthy. + // + // +optional + // +kubebuilder:validation:MaxItems=100 + UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"` + // maxUnhealthy specifies the maximum number of unhealthy machines allowed. // Any further remediation is only allowed if at most "maxUnhealthy" machines selected by // "selector" are not healthy. @@ -88,6 +96,34 @@ type UnhealthyCondition struct { // ANCHOR_END: UnhealthyCondition +// ANCHOR: UnhealthyMachineCondition + +// UnhealthyMachineCondition represents a Node condition type and value with a timeout +// specified as a duration. When the named condition has been in the given +// status for at least the timeout value, a node is considered unhealthy. +type UnhealthyMachineCondition struct { + // type of Node condition + // +kubebuilder:validation:Type=string + // +kubebuilder:validation:MinLength=1 + // +required + Type string `json:"type"` + + // status of the condition, one of True, False, Unknown. + // +kubebuilder:validation:Type=string + // +kubebuilder:validation:MinLength=1 + // +required + Status metav1.ConditionStatus `json:"status"` + + // timeout is the duration that a node must be in a given status for, + // after which the node is considered unhealthy. + // For example, with a value of "1h", the node must match the status + // for at least 1 hour before being considered unhealthy. + // +required + Timeout metav1.Duration `json:"timeout"` +} + +// ANCHOR_END: UnhealthyMachineCondition + // ANCHOR: MachineHealthCheckStatus // MachineHealthCheckStatus defines the observed state of MachineHealthCheck. diff --git a/internal/apis/core/v1alpha3/zz_generated.conversion.go b/internal/apis/core/v1alpha3/zz_generated.conversion.go index 33e8606cb481..f2d90844fa30 100644 --- a/internal/apis/core/v1alpha3/zz_generated.conversion.go +++ b/internal/apis/core/v1alpha3/zz_generated.conversion.go @@ -259,6 +259,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*UnhealthyMachineCondition)(nil), (*v1beta2.UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha3_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(a.(*UnhealthyMachineCondition), b.(*v1beta2.UnhealthyMachineCondition), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*v1beta2.UnhealthyMachineCondition)(nil), (*UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha3_UnhealthyMachineCondition(a.(*v1beta2.UnhealthyMachineCondition), b.(*UnhealthyMachineCondition), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1.Condition)(nil), (*Condition)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1_Condition_To_v1alpha3_Condition(a.(*v1.Condition), b.(*Condition), scope) }); err != nil { @@ -991,6 +1001,7 @@ func autoConvert_v1alpha3_MachineHealthCheckSpec_To_v1beta2_MachineHealthCheckSp out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyConditions requires manual conversion: does not exist in peer-type + out.UnhealthyMachineConditions = *(*[]v1beta2.UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) out.RemediationTemplate = (*corev1.ObjectReference)(unsafe.Pointer(in.RemediationTemplate)) @@ -1001,6 +1012,7 @@ func autoConvert_v1beta2_MachineHealthCheckSpec_To_v1alpha3_MachineHealthCheckSp out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyNodeConditions requires manual conversion: does not exist in peer-type + out.UnhealthyMachineConditions = *(*[]UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) // WARNING: in.UnhealthyRange requires manual conversion: does not exist in peer-type out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) @@ -1558,3 +1570,27 @@ func autoConvert_v1beta2_ObjectMeta_To_v1alpha3_ObjectMeta(in *v1beta2.ObjectMet func Convert_v1beta2_ObjectMeta_To_v1alpha3_ObjectMeta(in *v1beta2.ObjectMeta, out *ObjectMeta, s conversion.Scope) error { return autoConvert_v1beta2_ObjectMeta_To_v1alpha3_ObjectMeta(in, out, s) } + +func autoConvert_v1alpha3_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { + out.Type = in.Type + out.Status = v1.ConditionStatus(in.Status) + out.Timeout = in.Timeout + return nil +} + +// Convert_v1alpha3_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition is an autogenerated conversion function. +func Convert_v1alpha3_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { + return autoConvert_v1alpha3_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in, out, s) +} + +func autoConvert_v1beta2_UnhealthyMachineCondition_To_v1alpha3_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { + out.Type = in.Type + out.Status = v1.ConditionStatus(in.Status) + out.Timeout = in.Timeout + return nil +} + +// Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha3_UnhealthyMachineCondition is an autogenerated conversion function. +func Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha3_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { + return autoConvert_v1beta2_UnhealthyMachineCondition_To_v1alpha3_UnhealthyMachineCondition(in, out, s) +} diff --git a/internal/apis/core/v1alpha3/zz_generated.deepcopy.go b/internal/apis/core/v1alpha3/zz_generated.deepcopy.go index ac0ffaa9f4c5..e5744e3542bc 100644 --- a/internal/apis/core/v1alpha3/zz_generated.deepcopy.go +++ b/internal/apis/core/v1alpha3/zz_generated.deepcopy.go @@ -577,6 +577,11 @@ func (in *MachineHealthCheckSpec) DeepCopyInto(out *MachineHealthCheckSpec) { *out = make([]UnhealthyCondition, len(*in)) copy(*out, *in) } + if in.UnhealthyMachineConditions != nil { + in, out := &in.UnhealthyMachineConditions, &out.UnhealthyMachineConditions + *out = make([]UnhealthyMachineCondition, len(*in)) + copy(*out, *in) + } if in.MaxUnhealthy != nil { in, out := &in.MaxUnhealthy, &out.MaxUnhealthy *out = new(intstr.IntOrString) @@ -1107,3 +1112,19 @@ func (in *UnhealthyCondition) DeepCopy() *UnhealthyCondition { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *UnhealthyMachineCondition) DeepCopyInto(out *UnhealthyMachineCondition) { + *out = *in + out.Timeout = in.Timeout +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UnhealthyMachineCondition. +func (in *UnhealthyMachineCondition) DeepCopy() *UnhealthyMachineCondition { + if in == nil { + return nil + } + out := new(UnhealthyMachineCondition) + in.DeepCopyInto(out) + return out +} diff --git a/internal/apis/core/v1alpha4/machinehealthcheck_types.go b/internal/apis/core/v1alpha4/machinehealthcheck_types.go index 955de61446d1..eaba479c67b7 100644 --- a/internal/apis/core/v1alpha4/machinehealthcheck_types.go +++ b/internal/apis/core/v1alpha4/machinehealthcheck_types.go @@ -40,6 +40,14 @@ type MachineHealthCheckSpec struct { // +kubebuilder:validation:MinItems=1 UnhealthyConditions []UnhealthyCondition `json:"unhealthyConditions"` + // unhealthyMachineConditions contains a list of the machine conditions that determine + // whether a node is considered unhealthy. The conditions are combined in a + // logical OR, i.e. if any of the conditions is met, the node is unhealthy. + // + // +optional + // +kubebuilder:validation:MaxItems=100 + UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"` + // maxUnhealthy specifies the maximum number of unhealthy machines allowed. // Any further remediation is only allowed if at most "maxUnhealthy" machines selected by // "selector" are not healthy. @@ -100,6 +108,34 @@ type UnhealthyCondition struct { // ANCHOR_END: UnhealthyCondition +// ANCHOR: UnhealthyMachineCondition + +// UnhealthyMachineCondition represents a Node condition type and value with a timeout +// specified as a duration. When the named condition has been in the given +// status for at least the timeout value, a node is considered unhealthy. +type UnhealthyMachineCondition struct { + // type of Node condition + // +kubebuilder:validation:Type=string + // +kubebuilder:validation:MinLength=1 + // +required + Type string `json:"type"` + + // status of the condition, one of True, False, Unknown. + // +kubebuilder:validation:Type=string + // +kubebuilder:validation:MinLength=1 + // +required + Status metav1.ConditionStatus `json:"status"` + + // timeout is the duration that a node must be in a given status for, + // after which the node is considered unhealthy. + // For example, with a value of "1h", the node must match the status + // for at least 1 hour before being considered unhealthy. + // +required + Timeout metav1.Duration `json:"timeout"` +} + +// ANCHOR_END: UnhealthyMachineCondition + // ANCHOR: MachineHealthCheckStatus // MachineHealthCheckStatus defines the observed state of MachineHealthCheck. diff --git a/internal/apis/core/v1alpha4/zz_generated.conversion.go b/internal/apis/core/v1alpha4/zz_generated.conversion.go index 4dd554472564..f20879ee18b2 100644 --- a/internal/apis/core/v1alpha4/zz_generated.conversion.go +++ b/internal/apis/core/v1alpha4/zz_generated.conversion.go @@ -344,6 +344,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*UnhealthyMachineCondition)(nil), (*v1beta2.UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha4_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(a.(*UnhealthyMachineCondition), b.(*v1beta2.UnhealthyMachineCondition), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*v1beta2.UnhealthyMachineCondition)(nil), (*UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha4_UnhealthyMachineCondition(a.(*v1beta2.UnhealthyMachineCondition), b.(*UnhealthyMachineCondition), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*WorkersClass)(nil), (*v1beta2.WorkersClass)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_WorkersClass_To_v1beta2_WorkersClass(a.(*WorkersClass), b.(*v1beta2.WorkersClass), scope) }); err != nil { @@ -1424,6 +1434,7 @@ func autoConvert_v1alpha4_MachineHealthCheckSpec_To_v1beta2_MachineHealthCheckSp out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyConditions requires manual conversion: does not exist in peer-type + out.UnhealthyMachineConditions = *(*[]v1beta2.UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) out.UnhealthyRange = (*string)(unsafe.Pointer(in.UnhealthyRange)) out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) @@ -1435,6 +1446,7 @@ func autoConvert_v1beta2_MachineHealthCheckSpec_To_v1alpha4_MachineHealthCheckSp out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyNodeConditions requires manual conversion: does not exist in peer-type + out.UnhealthyMachineConditions = *(*[]UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) out.UnhealthyRange = (*string)(unsafe.Pointer(in.UnhealthyRange)) out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) @@ -2069,6 +2081,30 @@ func autoConvert_v1beta2_Topology_To_v1alpha4_Topology(in *v1beta2.Topology, out return nil } +func autoConvert_v1alpha4_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { + out.Type = in.Type + out.Status = v1.ConditionStatus(in.Status) + out.Timeout = in.Timeout + return nil +} + +// Convert_v1alpha4_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition is an autogenerated conversion function. +func Convert_v1alpha4_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { + return autoConvert_v1alpha4_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in, out, s) +} + +func autoConvert_v1beta2_UnhealthyMachineCondition_To_v1alpha4_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { + out.Type = in.Type + out.Status = v1.ConditionStatus(in.Status) + out.Timeout = in.Timeout + return nil +} + +// Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha4_UnhealthyMachineCondition is an autogenerated conversion function. +func Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha4_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { + return autoConvert_v1beta2_UnhealthyMachineCondition_To_v1alpha4_UnhealthyMachineCondition(in, out, s) +} + func autoConvert_v1alpha4_WorkersClass_To_v1beta2_WorkersClass(in *WorkersClass, out *v1beta2.WorkersClass, s conversion.Scope) error { if in.MachineDeployments != nil { in, out := &in.MachineDeployments, &out.MachineDeployments diff --git a/internal/apis/core/v1alpha4/zz_generated.deepcopy.go b/internal/apis/core/v1alpha4/zz_generated.deepcopy.go index 6d3e29cb0b60..f3894ed965c9 100644 --- a/internal/apis/core/v1alpha4/zz_generated.deepcopy.go +++ b/internal/apis/core/v1alpha4/zz_generated.deepcopy.go @@ -778,6 +778,11 @@ func (in *MachineHealthCheckSpec) DeepCopyInto(out *MachineHealthCheckSpec) { *out = make([]UnhealthyCondition, len(*in)) copy(*out, *in) } + if in.UnhealthyMachineConditions != nil { + in, out := &in.UnhealthyMachineConditions, &out.UnhealthyMachineConditions + *out = make([]UnhealthyMachineCondition, len(*in)) + copy(*out, *in) + } if in.MaxUnhealthy != nil { in, out := &in.MaxUnhealthy, &out.MaxUnhealthy *out = new(intstr.IntOrString) @@ -1344,6 +1349,22 @@ func (in *UnhealthyCondition) DeepCopy() *UnhealthyCondition { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *UnhealthyMachineCondition) DeepCopyInto(out *UnhealthyMachineCondition) { + *out = *in + out.Timeout = in.Timeout +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UnhealthyMachineCondition. +func (in *UnhealthyMachineCondition) DeepCopy() *UnhealthyMachineCondition { + if in == nil { + return nil + } + out := new(UnhealthyMachineCondition) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *WorkersClass) DeepCopyInto(out *WorkersClass) { *out = *in diff --git a/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go b/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go index 3ad6d6790d8d..e7d1c4465c95 100644 --- a/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go +++ b/internal/controllers/machinehealthcheck/machinehealthcheck_targets.go @@ -190,7 +190,7 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi continue } - // If the condition has been in the unhealthy state for longer than the + // If the node condition has been in the unhealthy state for longer than the // timeout, return true with no requeue time. if nodeCondition.LastTransitionTime.Add(c.Timeout.Duration).Before(now) { v1beta1conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSucceededV1Beta1Condition, clusterv1.UnhealthyNodeConditionV1Beta1Reason, clusterv1.ConditionSeverityWarning, "Condition %s on node is reporting status %s for more than %s", c.Type, c.Status, c.Timeout.Duration.String()) @@ -211,6 +211,38 @@ func (t *healthCheckTarget) needsRemediation(logger logr.Logger, timeoutForMachi nextCheckTimes = append(nextCheckTimes, nextCheck) } } + + for _, c := range t.MHC.Spec.UnhealthyMachineConditions { + machineCondition := getMachineCondition(t.Machine, c.Type) + + // Skip when current machine condition is different from the one reported + // in the MachineHealthCheck. + if machineCondition == nil || machineCondition.Status != c.Status { + continue + } + + // If the machine condition has been in the unhealthy state for longer than the + // timeout, return true with no requeue time. + if machineCondition.LastTransitionTime.Add(c.Timeout.Duration).Before(now) { + v1beta1conditions.MarkFalse(t.Machine, clusterv1.MachineHealthCheckSucceededV1Beta1Condition, clusterv1.UnhealthyMachineConditionV1Beta1Reason, clusterv1.ConditionSeverityWarning, "Condition %s on the machine is reporting status %s for more than %s", c.Type, c.Status, c.Timeout.Duration.String()) + logger.V(3).Info("Target is unhealthy: condition is in state longer than allowed timeout", "condition", c.Type, "state", c.Status, "timeout", c.Timeout.Duration.String()) + + conditions.Set(t.Machine, metav1.Condition{ + Type: clusterv1.MachineHealthCheckSucceededCondition, + Status: metav1.ConditionFalse, + Reason: clusterv1.MachineHealthCheckUnhealthyMachineReason, + Message: fmt.Sprintf("Health check failed: Condition %s on Machine is reporting status %s for more than %s", c.Type, c.Status, c.Timeout.Duration.String()), + }) + return true, time.Duration(0) + } + + durationUnhealthy := now.Sub(machineCondition.LastTransitionTime.Time) + nextCheck := c.Timeout.Duration - durationUnhealthy + time.Second + if nextCheck > 0 { + nextCheckTimes = append(nextCheckTimes, nextCheck) + } + } + return false, minDuration(nextCheckTimes) } @@ -357,6 +389,16 @@ func getNodeCondition(node *corev1.Node, conditionType corev1.NodeConditionType) return nil } +// getMachineCondition returns machine condition by type. +func getMachineCondition(node *clusterv1.Machine, conditionType string) *metav1.Condition { + for _, cond := range node.Status.Conditions { + if cond.Type == conditionType { + return &cond + } + } + return nil +} + func minDuration(durations []time.Duration) time.Duration { if len(durations) == 0 { return time.Duration(0) diff --git a/internal/controllers/machinehealthcheck/machinehealthcheck_targets_test.go b/internal/controllers/machinehealthcheck/machinehealthcheck_targets_test.go index f6c6939fdfe0..aabcc02e0e62 100644 --- a/internal/controllers/machinehealthcheck/machinehealthcheck_targets_test.go +++ b/internal/controllers/machinehealthcheck/machinehealthcheck_targets_test.go @@ -29,6 +29,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + controlplanev1 "sigs.k8s.io/cluster-api/api/controlplane/kubeadm/v1beta2" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/cluster-api/util/conditions" v1beta1conditions "sigs.k8s.io/cluster-api/util/conditions/deprecated/v1beta1" @@ -212,6 +213,7 @@ func TestHealthCheckTargets(t *testing.T) { timeoutForMachineToHaveNode := 10 * time.Minute disabledTimeoutForMachineToHaveNode := time.Duration(0) timeoutForUnhealthyNodeConditions := 5 * time.Minute + timeoutForUnhealthyMachineConditions := 5 * time.Minute // Create a test MHC testMHC := &clusterv1.MachineHealthCheck{ @@ -236,6 +238,13 @@ func TestHealthCheckTargets(t *testing.T) { Timeout: metav1.Duration{Duration: timeoutForUnhealthyNodeConditions}, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: timeoutForUnhealthyMachineConditions}, + }, + }, }, } @@ -392,6 +401,18 @@ func TestHealthCheckTargets(t *testing.T) { machineAnnotationRemediationCondition := newFailedHealthCheckV1Beta1Condition(clusterv1.HasRemediateMachineAnnotationV1Beta1Reason, annotationRemediationMsg) machineAnnotationRemediationV1Beta2Condition := newFailedHealthCheckCondition(clusterv1.MachineHealthCheckHasRemediateAnnotationReason, annotationRemediationV1Beta2Msg) + // when the machine has been in an unhealthy state for longer than the timeout + testMachineUnhealthy400 := newTestUnhealthyMachine("machine1", namespace, clusterName, "node1", mhcSelector, controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, metav1.ConditionFalse, 400*time.Second) + machineUnhealthy400 := healthCheckTarget{ + Cluster: cluster, + MHC: testMHC, + Node: testNodeHealthy, + Machine: testMachineUnhealthy400, + nodeMissing: false, + } + machineUnhealthy400Condition := newFailedHealthCheckV1Beta1Condition(clusterv1.UnhealthyMachineConditionV1Beta1Reason, "Condition EtcdPodHealthy on the machine is reporting status False for more than %s", timeoutForUnhealthyNodeConditions) + machineUnhealthy400V1Beta2Condition := newFailedHealthCheckCondition(clusterv1.MachineHealthCheckUnhealthyMachineReason, "Health check failed: Condition EtcdPodHealthy on Machine is reporting status False for more than %s", timeoutForUnhealthyNodeConditions) + testCases := []struct { desc string targets []healthCheckTarget @@ -500,6 +521,15 @@ func TestHealthCheckTargets(t *testing.T) { expectedNeedsRemediationCondition: []clusterv1.Condition{}, expectedNextCheckTimes: []time.Duration{}, }, + { + desc: "when the machine has been in an unhealthy state for longer than the timeout", + targets: []healthCheckTarget{machineUnhealthy400}, + expectedHealthy: []healthCheckTarget{}, + expectedNeedsRemediation: []healthCheckTarget{machineUnhealthy400}, + expectedNeedsRemediationCondition: []clusterv1.Condition{machineUnhealthy400Condition}, + expectedNeedsRemediationV1Beta2Condition: []metav1.Condition{machineUnhealthy400V1Beta2Condition}, + expectedNextCheckTimes: []time.Duration{}, + }, } for _, tc := range testCases { @@ -635,6 +665,51 @@ func newTestUnhealthyNode(name string, condition corev1.NodeConditionType, statu } } +func newTestUnhealthyMachine(name, namespace, clusterName, nodeName string, labels map[string]string, condition string, status metav1.ConditionStatus, unhealthyDuration time.Duration) *clusterv1.Machine { + // Copy the labels so that the map is unique to each test Machine + l := make(map[string]string) + for k, v := range labels { + l[k] = v + } + l[clusterv1.ClusterNameLabel] = clusterName + + bootstrap := "bootstrap" + return &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{ + APIVersion: clusterv1.GroupVersion.String(), + Kind: "Machine", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: l, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: clusterName, + Bootstrap: clusterv1.Bootstrap{ + DataSecretName: &bootstrap, + }, + }, + Status: clusterv1.MachineStatus{ + Conditions: []metav1.Condition{ + { + Type: condition, + Status: status, + LastTransitionTime: metav1.NewTime(time.Now().Add(-unhealthyDuration)), + }, + }, + Initialization: &clusterv1.MachineInitializationStatus{ + InfrastructureProvisioned: true, + BootstrapDataSecretCreated: true, + }, + Phase: string(clusterv1.MachinePhaseRunning), + NodeRef: &corev1.ObjectReference{ + Name: nodeName, + }, + }, + } +} + func newFailedHealthCheckV1Beta1Condition(reason string, messageFormat string, messageArgs ...interface{}) clusterv1.Condition { return *v1beta1conditions.FalseCondition(clusterv1.MachineHealthCheckSucceededV1Beta1Condition, reason, clusterv1.ConditionSeverityWarning, messageFormat, messageArgs...) } From 097384d371af914f800425b26fb66dcfa1988863 Mon Sep 17 00:00:00 2001 From: Justin Miron Date: Sun, 25 May 2025 12:16:54 -0500 Subject: [PATCH 2/4] remove from old apis, ensure unhealthyMachineConditions is used/tested everywhere --- api/core/v1beta1/conversion_test.go | 43 +++++++ api/core/v1beta1/machinehealthcheck_types.go | 36 ------ api/core/v1beta1/zz_generated.conversion.go | 38 +----- api/core/v1beta1/zz_generated.deepcopy.go | 21 --- api/core/v1beta1/zz_generated.openapi.go | 55 +------- api/core/v1beta2/clusterclass_types.go | 8 ++ api/core/v1beta2/zz_generated.deepcopy.go | 5 + api/core/v1beta2/zz_generated.openapi.go | 32 ++++- .../cluster.x-k8s.io_clusterclasses.yaml | 68 ++++++++++ .../crd/bases/cluster.x-k8s.io_clusters.yaml | 68 ++++++++++ .../cluster.x-k8s.io_machinehealthchecks.yaml | 99 -------------- exp/topology/desiredstate/desired_state.go | 15 ++- .../desiredstate/desired_state_test.go | 38 +++++- .../apis/core/v1alpha3/conversion_test.go | 11 ++ .../core/v1alpha3/machinehealthcheck_types.go | 36 ------ .../core/v1alpha3/zz_generated.conversion.go | 37 +----- .../core/v1alpha3/zz_generated.deepcopy.go | 21 --- .../apis/core/v1alpha4/conversion_test.go | 11 ++ .../core/v1alpha4/machinehealthcheck_types.go | 36 ------ .../core/v1alpha4/zz_generated.conversion.go | 37 +----- .../core/v1alpha4/zz_generated.deepcopy.go | 21 --- .../machinehealthcheck_controller_test.go | 120 ++++++++++++++++- .../topology/cluster/reconcile_state_test.go | 35 +++++ internal/webhooks/clusterclass.go | 11 +- internal/webhooks/clusterclass_test.go | 47 ++++++- internal/webhooks/machinehealthcheck_test.go | 121 ++++++++++++++++++ util/test/builder/builders.go | 30 +++-- util/test/builder/zz_generated.deepcopy.go | 5 + 28 files changed, 641 insertions(+), 464 deletions(-) diff --git a/api/core/v1beta1/conversion_test.go b/api/core/v1beta1/conversion_test.go index a52562ef4e04..bd79d3bd2478 100644 --- a/api/core/v1beta1/conversion_test.go +++ b/api/core/v1beta1/conversion_test.go @@ -75,6 +75,7 @@ func TestFuzzyConversion(t *testing.T) { func ClusterFuzzFuncs(_ runtimeserializer.CodecFactory) []interface{} { return []interface{}{ + hubClusterSpec, hubClusterStatus, spokeClusterTopology, spokeClusterStatus, @@ -99,6 +100,21 @@ func hubClusterStatus(in *clusterv1.ClusterStatus, c randfill.Continue) { } } +func hubClusterSpec(in *clusterv1.ClusterSpec, c randfill.Continue) { + c.FillNoCustom(in) + + // remove MachineHealthCheck.UnhealthyMachineConditions as it does not exist in v1beta1. + if in.Topology != nil && in.Topology.ControlPlane.MachineHealthCheck != nil { + in.Topology.ControlPlane.MachineHealthCheck.UnhealthyMachineConditions = nil + } + + if in.Topology != nil && in.Topology.Workers != nil && len(in.Topology.Workers.MachineDeployments) > 0 { + for i := range in.Topology.Workers.MachineDeployments { + in.Topology.Workers.MachineDeployments[i].MachineHealthCheck = nil + } + } +} + func spokeClusterTopology(in *Topology, c randfill.Continue) { c.FillNoCustom(in) @@ -129,6 +145,22 @@ func ClusterClassFuncs(_ runtimeserializer.CodecFactory) []interface{} { hubJSONSchemaProps, spokeClusterClassStatus, spokeJSONSchemaProps, + hubClusterClassSpec, + } +} + +func hubClusterClassSpec(in *clusterv1.ClusterClassSpec, c randfill.Continue) { + c.FillNoCustom(in) + + // remove MachineHealthCheck.UnhealthyMachineConditions as it does not exist in v1beta1. + if in.ControlPlane.MachineHealthCheck != nil && in.ControlPlane.MachineHealthCheck.UnhealthyMachineConditions != nil { + in.ControlPlane.MachineHealthCheck.UnhealthyMachineConditions = nil + } + + if len(in.Workers.MachineDeployments) > 0 { + for i := range in.Workers.MachineDeployments { + in.Workers.MachineDeployments[i].MachineHealthCheck = nil + } } } @@ -348,13 +380,24 @@ func spokeMachineDeploymentStatus(in *MachineDeploymentStatus, c randfill.Contin func MachineHealthCheckFuzzFuncs(_ runtimeserializer.CodecFactory) []interface{} { return []interface{}{ + hubMachineHealthCheckSpec, hubMachineHealthCheckStatus, spokeMachineHealthCheckStatus, } } +func hubMachineHealthCheckSpec(in *clusterv1.MachineHealthCheckSpec, c randfill.Continue) { + c.FillNoCustom(in) + + // Drop UnhealthyMachineConditions as it does not exist in v1beta1. + if in.UnhealthyMachineConditions != nil { + in.UnhealthyMachineConditions = nil + } +} + func hubMachineHealthCheckStatus(in *clusterv1.MachineHealthCheckStatus, c randfill.Continue) { c.FillNoCustom(in) + // Drop empty structs with only omit empty fields. if in.Deprecated != nil { if in.Deprecated.V1Beta1 == nil || reflect.DeepEqual(in.Deprecated.V1Beta1, &clusterv1.MachineHealthCheckV1Beta1DeprecatedStatus{}) { diff --git a/api/core/v1beta1/machinehealthcheck_types.go b/api/core/v1beta1/machinehealthcheck_types.go index bcf6f9f694eb..6ca33d4e2663 100644 --- a/api/core/v1beta1/machinehealthcheck_types.go +++ b/api/core/v1beta1/machinehealthcheck_types.go @@ -69,14 +69,6 @@ type MachineHealthCheckSpec struct { // +kubebuilder:validation:MaxItems=100 UnhealthyConditions []UnhealthyCondition `json:"unhealthyConditions,omitempty"` - // unhealthyMachineConditions contains a list of the machine conditions that determine - // whether a node is considered unhealthy. The conditions are combined in a - // logical OR, i.e. if any of the conditions is met, the node is unhealthy. - // - // +optional - // +kubebuilder:validation:MaxItems=100 - UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"` - // maxUnhealthy specifies the maximum number of unhealthy machines allowed. // Any further remediation is only allowed if at most "maxUnhealthy" machines selected by // "selector" are not healthy. @@ -156,34 +148,6 @@ type UnhealthyCondition struct { // ANCHOR_END: UnhealthyCondition -// ANCHOR: UnhealthyMachineCondition - -// UnhealthyMachineCondition represents a Node condition type and value with a timeout -// specified as a duration. When the named condition has been in the given -// status for at least the timeout value, a node is considered unhealthy. -type UnhealthyMachineCondition struct { - // type of Node condition - // +kubebuilder:validation:Type=string - // +kubebuilder:validation:MinLength=1 - // +required - Type string `json:"type"` - - // status of the condition, one of True, False, Unknown. - // +kubebuilder:validation:Type=string - // +kubebuilder:validation:MinLength=1 - // +required - Status metav1.ConditionStatus `json:"status"` - - // timeout is the duration that a node must be in a given status for, - // after which the node is considered unhealthy. - // For example, with a value of "1h", the node must match the status - // for at least 1 hour before being considered unhealthy. - // +required - Timeout metav1.Duration `json:"timeout"` -} - -// ANCHOR_END: UnhealthyMachineCondition - // ANCHOR: MachineHealthCheckStatus // MachineHealthCheckStatus defines the observed state of MachineHealthCheck. diff --git a/api/core/v1beta1/zz_generated.conversion.go b/api/core/v1beta1/zz_generated.conversion.go index 25d49308755a..9e658b248093 100644 --- a/api/core/v1beta1/zz_generated.conversion.go +++ b/api/core/v1beta1/zz_generated.conversion.go @@ -725,16 +725,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*UnhealthyMachineCondition)(nil), (*v1beta2.UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta1_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(a.(*UnhealthyMachineCondition), b.(*v1beta2.UnhealthyMachineCondition), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*v1beta2.UnhealthyMachineCondition)(nil), (*UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta2_UnhealthyMachineCondition_To_v1beta1_UnhealthyMachineCondition(a.(*v1beta2.UnhealthyMachineCondition), b.(*UnhealthyMachineCondition), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*ValidationRule)(nil), (*v1beta2.ValidationRule)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_ValidationRule_To_v1beta2_ValidationRule(a.(*ValidationRule), b.(*v1beta2.ValidationRule), scope) }); err != nil { @@ -2704,6 +2694,7 @@ func autoConvert_v1beta1_MachineHealthCheckClass_To_v1beta2_MachineHealthCheckCl func autoConvert_v1beta2_MachineHealthCheckClass_To_v1beta1_MachineHealthCheckClass(in *v1beta2.MachineHealthCheckClass, out *MachineHealthCheckClass, s conversion.Scope) error { // WARNING: in.UnhealthyNodeConditions requires manual conversion: does not exist in peer-type + // WARNING: in.UnhealthyMachineConditions requires manual conversion: does not exist in peer-type out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) out.UnhealthyRange = (*string)(unsafe.Pointer(in.UnhealthyRange)) out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) @@ -2757,7 +2748,6 @@ func autoConvert_v1beta1_MachineHealthCheckSpec_To_v1beta2_MachineHealthCheckSpe out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyConditions requires manual conversion: does not exist in peer-type - out.UnhealthyMachineConditions = *(*[]v1beta2.UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) out.UnhealthyRange = (*string)(unsafe.Pointer(in.UnhealthyRange)) out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) @@ -2769,7 +2759,7 @@ func autoConvert_v1beta2_MachineHealthCheckSpec_To_v1beta1_MachineHealthCheckSpe out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyNodeConditions requires manual conversion: does not exist in peer-type - out.UnhealthyMachineConditions = *(*[]UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) + // WARNING: in.UnhealthyMachineConditions requires manual conversion: does not exist in peer-type out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) out.UnhealthyRange = (*string)(unsafe.Pointer(in.UnhealthyRange)) out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) @@ -3835,30 +3825,6 @@ func autoConvert_v1beta2_Topology_To_v1beta1_Topology(in *v1beta2.Topology, out return nil } -func autoConvert_v1beta1_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { - out.Type = in.Type - out.Status = v1.ConditionStatus(in.Status) - out.Timeout = in.Timeout - return nil -} - -// Convert_v1beta1_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition is an autogenerated conversion function. -func Convert_v1beta1_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { - return autoConvert_v1beta1_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in, out, s) -} - -func autoConvert_v1beta2_UnhealthyMachineCondition_To_v1beta1_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { - out.Type = in.Type - out.Status = v1.ConditionStatus(in.Status) - out.Timeout = in.Timeout - return nil -} - -// Convert_v1beta2_UnhealthyMachineCondition_To_v1beta1_UnhealthyMachineCondition is an autogenerated conversion function. -func Convert_v1beta2_UnhealthyMachineCondition_To_v1beta1_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { - return autoConvert_v1beta2_UnhealthyMachineCondition_To_v1beta1_UnhealthyMachineCondition(in, out, s) -} - func autoConvert_v1beta1_ValidationRule_To_v1beta2_ValidationRule(in *ValidationRule, out *v1beta2.ValidationRule, s conversion.Scope) error { out.Rule = in.Rule out.Message = in.Message diff --git a/api/core/v1beta1/zz_generated.deepcopy.go b/api/core/v1beta1/zz_generated.deepcopy.go index f0cc5caea8c4..a1b090669cb4 100644 --- a/api/core/v1beta1/zz_generated.deepcopy.go +++ b/api/core/v1beta1/zz_generated.deepcopy.go @@ -1838,11 +1838,6 @@ func (in *MachineHealthCheckSpec) DeepCopyInto(out *MachineHealthCheckSpec) { *out = make([]UnhealthyCondition, len(*in)) copy(*out, *in) } - if in.UnhealthyMachineConditions != nil { - in, out := &in.UnhealthyMachineConditions, &out.UnhealthyMachineConditions - *out = make([]UnhealthyMachineCondition, len(*in)) - copy(*out, *in) - } if in.MaxUnhealthy != nil { in, out := &in.MaxUnhealthy, &out.MaxUnhealthy *out = new(intstr.IntOrString) @@ -2910,22 +2905,6 @@ func (in *UnhealthyCondition) DeepCopy() *UnhealthyCondition { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *UnhealthyMachineCondition) DeepCopyInto(out *UnhealthyMachineCondition) { - *out = *in - out.Timeout = in.Timeout -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UnhealthyMachineCondition. -func (in *UnhealthyMachineCondition) DeepCopy() *UnhealthyMachineCondition { - if in == nil { - return nil - } - out := new(UnhealthyMachineCondition) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ValidationRule) DeepCopyInto(out *ValidationRule) { *out = *in diff --git a/api/core/v1beta1/zz_generated.openapi.go b/api/core/v1beta1/zz_generated.openapi.go index 5ed4ae360e7f..13a78b23719c 100644 --- a/api/core/v1beta1/zz_generated.openapi.go +++ b/api/core/v1beta1/zz_generated.openapi.go @@ -121,7 +121,6 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA "sigs.k8s.io/cluster-api/api/core/v1beta1.RemediationStrategy": schema_cluster_api_api_core_v1beta1_RemediationStrategy(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.Topology": schema_cluster_api_api_core_v1beta1_Topology(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyCondition": schema_cluster_api_api_core_v1beta1_UnhealthyCondition(ref), - "sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyMachineCondition": schema_cluster_api_api_core_v1beta1_UnhealthyMachineCondition(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.ValidationRule": schema_cluster_api_api_core_v1beta1_ValidationRule(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.VariableSchema": schema_cluster_api_api_core_v1beta1_VariableSchema(ref), "sigs.k8s.io/cluster-api/api/core/v1beta1.VariableSchemaMetadata": schema_cluster_api_api_core_v1beta1_VariableSchemaMetadata(ref), @@ -3182,20 +3181,6 @@ func schema_cluster_api_api_core_v1beta1_MachineHealthCheckSpec(ref common.Refer }, }, }, - "unhealthyMachineConditions": { - SchemaProps: spec.SchemaProps{ - Description: "unhealthyMachineConditions contains a list of the machine conditions that determine whether a node is considered unhealthy. The conditions are combined in a logical OR, i.e. if any of the conditions is met, the node is unhealthy.", - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref("sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyMachineCondition"), - }, - }, - }, - }, - }, "maxUnhealthy": { SchemaProps: spec.SchemaProps{ Description: "maxUnhealthy specifies the maximum number of unhealthy machines allowed. Any further remediation is only allowed if at most \"maxUnhealthy\" machines selected by \"selector\" are not healthy.\n\nDeprecated: This field is deprecated and is going to be removed in the next apiVersion. Please see https://github.com/kubernetes-sigs/cluster-api/issues/10722 for more details.", @@ -3226,7 +3211,7 @@ func schema_cluster_api_api_core_v1beta1_MachineHealthCheckSpec(ref common.Refer }, }, Dependencies: []string{ - "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector", "k8s.io/apimachinery/pkg/util/intstr.IntOrString", "sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyCondition", "sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyMachineCondition"}, + "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/apimachinery/pkg/apis/meta/v1.LabelSelector", "k8s.io/apimachinery/pkg/util/intstr.IntOrString", "sigs.k8s.io/cluster-api/api/core/v1beta1.UnhealthyCondition"}, } } @@ -5155,44 +5140,6 @@ func schema_cluster_api_api_core_v1beta1_UnhealthyCondition(ref common.Reference } } -func schema_cluster_api_api_core_v1beta1_UnhealthyMachineCondition(ref common.ReferenceCallback) common.OpenAPIDefinition { - return common.OpenAPIDefinition{ - Schema: spec.Schema{ - SchemaProps: spec.SchemaProps{ - Description: "UnhealthyMachineCondition represents a Node condition type and value with a timeout specified as a duration. When the named condition has been in the given status for at least the timeout value, a node is considered unhealthy.", - Type: []string{"object"}, - Properties: map[string]spec.Schema{ - "type": { - SchemaProps: spec.SchemaProps{ - Description: "type of Node condition", - Default: "", - Type: []string{"string"}, - Format: "", - }, - }, - "status": { - SchemaProps: spec.SchemaProps{ - Description: "status of the condition, one of True, False, Unknown.", - Default: "", - Type: []string{"string"}, - Format: "", - }, - }, - "timeout": { - SchemaProps: spec.SchemaProps{ - Description: "timeout is the duration that a node must be in a given status for, after which the node is considered unhealthy. For example, with a value of \"1h\", the node must match the status for at least 1 hour before being considered unhealthy.", - Ref: ref("k8s.io/apimachinery/pkg/apis/meta/v1.Duration"), - }, - }, - }, - Required: []string{"type", "status", "timeout"}, - }, - }, - Dependencies: []string{ - "k8s.io/apimachinery/pkg/apis/meta/v1.Duration"}, - } -} - func schema_cluster_api_api_core_v1beta1_ValidationRule(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ diff --git a/api/core/v1beta2/clusterclass_types.go b/api/core/v1beta2/clusterclass_types.go index e35cf8b25cdb..3389c837832d 100644 --- a/api/core/v1beta2/clusterclass_types.go +++ b/api/core/v1beta2/clusterclass_types.go @@ -385,6 +385,14 @@ type MachineHealthCheckClass struct { // +kubebuilder:validation:MaxItems=100 UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"` + // unhealthyMachineConditions contains a list of the machine conditions that determine + // whether a node is considered unhealthy. The conditions are combined in a + // logical OR, i.e. if any of the conditions is met, the node is unhealthy. + // + // +optional + // +kubebuilder:validation:MaxItems=100 + UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"` + // maxUnhealthy specifies the maximum number of unhealthy machines allowed. // Any further remediation is only allowed if at most "maxUnhealthy" machines selected by // "selector" are not healthy. diff --git a/api/core/v1beta2/zz_generated.deepcopy.go b/api/core/v1beta2/zz_generated.deepcopy.go index 0772d1d8a490..cd18d98c5e1a 100644 --- a/api/core/v1beta2/zz_generated.deepcopy.go +++ b/api/core/v1beta2/zz_generated.deepcopy.go @@ -1891,6 +1891,11 @@ func (in *MachineHealthCheckClass) DeepCopyInto(out *MachineHealthCheckClass) { *out = make([]UnhealthyNodeCondition, len(*in)) copy(*out, *in) } + if in.UnhealthyMachineConditions != nil { + in, out := &in.UnhealthyMachineConditions, &out.UnhealthyMachineConditions + *out = make([]UnhealthyMachineCondition, len(*in)) + copy(*out, *in) + } if in.MaxUnhealthy != nil { in, out := &in.MaxUnhealthy, &out.MaxUnhealthy *out = new(intstr.IntOrString) diff --git a/api/core/v1beta2/zz_generated.openapi.go b/api/core/v1beta2/zz_generated.openapi.go index f1c11bb5f4e3..100b41efdcad 100644 --- a/api/core/v1beta2/zz_generated.openapi.go +++ b/api/core/v1beta2/zz_generated.openapi.go @@ -3206,6 +3206,20 @@ func schema_cluster_api_api_core_v1beta2_MachineHealthCheckClass(ref common.Refe }, }, }, + "unhealthyMachineConditions": { + SchemaProps: spec.SchemaProps{ + Description: "unhealthyMachineConditions contains a list of the machine conditions that determine whether a node is considered unhealthy. The conditions are combined in a logical OR, i.e. if any of the conditions is met, the node is unhealthy.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyMachineCondition"), + }, + }, + }, + }, + }, "maxUnhealthy": { SchemaProps: spec.SchemaProps{ Description: "maxUnhealthy specifies the maximum number of unhealthy machines allowed. Any further remediation is only allowed if at most \"maxUnhealthy\" machines selected by \"selector\" are not healthy.", @@ -3235,7 +3249,7 @@ func schema_cluster_api_api_core_v1beta2_MachineHealthCheckClass(ref common.Refe }, }, Dependencies: []string{ - "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/apimachinery/pkg/util/intstr.IntOrString", "sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyNodeCondition"}, + "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/apimachinery/pkg/util/intstr.IntOrString", "sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyMachineCondition", "sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyNodeCondition"}, } } @@ -3512,6 +3526,20 @@ func schema_cluster_api_api_core_v1beta2_MachineHealthCheckTopology(ref common.R }, }, }, + "unhealthyMachineConditions": { + SchemaProps: spec.SchemaProps{ + Description: "unhealthyMachineConditions contains a list of the machine conditions that determine whether a node is considered unhealthy. The conditions are combined in a logical OR, i.e. if any of the conditions is met, the node is unhealthy.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref("sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyMachineCondition"), + }, + }, + }, + }, + }, "maxUnhealthy": { SchemaProps: spec.SchemaProps{ Description: "maxUnhealthy specifies the maximum number of unhealthy machines allowed. Any further remediation is only allowed if at most \"maxUnhealthy\" machines selected by \"selector\" are not healthy.", @@ -3541,7 +3569,7 @@ func schema_cluster_api_api_core_v1beta2_MachineHealthCheckTopology(ref common.R }, }, Dependencies: []string{ - "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/apimachinery/pkg/util/intstr.IntOrString", "sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyNodeCondition"}, + "k8s.io/api/core/v1.ObjectReference", "k8s.io/apimachinery/pkg/apis/meta/v1.Duration", "k8s.io/apimachinery/pkg/util/intstr.IntOrString", "sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyMachineCondition", "sigs.k8s.io/cluster-api/api/core/v1beta2.UnhealthyNodeCondition"}, } } diff --git a/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml b/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml index b1e61da94ffb..c98edb2fa8b9 100644 --- a/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml +++ b/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml @@ -2970,6 +2970,40 @@ spec: type: string type: object x-kubernetes-map-type: atomic + unhealthyMachineConditions: + description: |- + unhealthyMachineConditions contains a list of the machine conditions that determine + whether a node is considered unhealthy. The conditions are combined in a + logical OR, i.e. if any of the conditions is met, the node is unhealthy. + items: + description: |- + UnhealthyMachineCondition represents a Node condition type and value with a timeout + specified as a duration. When the named condition has been in the given + status for at least the timeout value, a node is considered unhealthy. + properties: + status: + description: status of the condition, one of True, False, + Unknown. + minLength: 1 + type: string + timeout: + description: |- + timeout is the duration that a node must be in a given status for, + after which the node is considered unhealthy. + For example, with a value of "1h", the node must match the status + for at least 1 hour before being considered unhealthy. + type: string + type: + description: type of Node condition + minLength: 1 + type: string + required: + - status + - timeout + - type + type: object + maxItems: 100 + type: array unhealthyNodeConditions: description: |- unhealthyNodeConditions contains a list of conditions that determine @@ -4032,6 +4066,40 @@ spec: type: string type: object x-kubernetes-map-type: atomic + unhealthyMachineConditions: + description: |- + unhealthyMachineConditions contains a list of the machine conditions that determine + whether a node is considered unhealthy. The conditions are combined in a + logical OR, i.e. if any of the conditions is met, the node is unhealthy. + items: + description: |- + UnhealthyMachineCondition represents a Node condition type and value with a timeout + specified as a duration. When the named condition has been in the given + status for at least the timeout value, a node is considered unhealthy. + properties: + status: + description: status of the condition, one of True, + False, Unknown. + minLength: 1 + type: string + timeout: + description: |- + timeout is the duration that a node must be in a given status for, + after which the node is considered unhealthy. + For example, with a value of "1h", the node must match the status + for at least 1 hour before being considered unhealthy. + type: string + type: + description: type of Node condition + minLength: 1 + type: string + required: + - status + - timeout + - type + type: object + maxItems: 100 + type: array unhealthyNodeConditions: description: |- unhealthyNodeConditions contains a list of conditions that determine diff --git a/config/crd/bases/cluster.x-k8s.io_clusters.yaml b/config/crd/bases/cluster.x-k8s.io_clusters.yaml index d57e620ddee6..4705cb6fd63d 100644 --- a/config/crd/bases/cluster.x-k8s.io_clusters.yaml +++ b/config/crd/bases/cluster.x-k8s.io_clusters.yaml @@ -2510,6 +2510,40 @@ spec: type: string type: object x-kubernetes-map-type: atomic + unhealthyMachineConditions: + description: |- + unhealthyMachineConditions contains a list of the machine conditions that determine + whether a node is considered unhealthy. The conditions are combined in a + logical OR, i.e. if any of the conditions is met, the node is unhealthy. + items: + description: |- + UnhealthyMachineCondition represents a Node condition type and value with a timeout + specified as a duration. When the named condition has been in the given + status for at least the timeout value, a node is considered unhealthy. + properties: + status: + description: status of the condition, one of True, + False, Unknown. + minLength: 1 + type: string + timeout: + description: |- + timeout is the duration that a node must be in a given status for, + after which the node is considered unhealthy. + For example, with a value of "1h", the node must match the status + for at least 1 hour before being considered unhealthy. + type: string + type: + description: type of Node condition + minLength: 1 + type: string + required: + - status + - timeout + - type + type: object + maxItems: 100 + type: array unhealthyNodeConditions: description: |- unhealthyNodeConditions contains a list of conditions that determine @@ -2847,6 +2881,40 @@ spec: type: string type: object x-kubernetes-map-type: atomic + unhealthyMachineConditions: + description: |- + unhealthyMachineConditions contains a list of the machine conditions that determine + whether a node is considered unhealthy. The conditions are combined in a + logical OR, i.e. if any of the conditions is met, the node is unhealthy. + items: + description: |- + UnhealthyMachineCondition represents a Node condition type and value with a timeout + specified as a duration. When the named condition has been in the given + status for at least the timeout value, a node is considered unhealthy. + properties: + status: + description: status of the condition, one + of True, False, Unknown. + minLength: 1 + type: string + timeout: + description: |- + timeout is the duration that a node must be in a given status for, + after which the node is considered unhealthy. + For example, with a value of "1h", the node must match the status + for at least 1 hour before being considered unhealthy. + type: string + type: + description: type of Node condition + minLength: 1 + type: string + required: + - status + - timeout + - type + type: object + maxItems: 100 + type: array unhealthyNodeConditions: description: |- unhealthyNodeConditions contains a list of conditions that determine diff --git a/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml b/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml index 78e1b0a734fb..52bb9fa35597 100644 --- a/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml @@ -209,39 +209,6 @@ spec: type: object minItems: 1 type: array - unhealthyMachineConditions: - description: |- - unhealthyMachineConditions contains a list of the machine conditions that determine - whether a node is considered unhealthy. The conditions are combined in a - logical OR, i.e. if any of the conditions is met, the node is unhealthy. - items: - description: |- - UnhealthyMachineCondition represents a Node condition type and value with a timeout - specified as a duration. When the named condition has been in the given - status for at least the timeout value, a node is considered unhealthy. - properties: - status: - description: status of the condition, one of True, False, Unknown. - minLength: 1 - type: string - timeout: - description: |- - timeout is the duration that a node must be in a given status for, - after which the node is considered unhealthy. - For example, with a value of "1h", the node must match the status - for at least 1 hour before being considered unhealthy. - type: string - type: - description: type of Node condition - minLength: 1 - type: string - required: - - status - - timeout - - type - type: object - maxItems: 100 - type: array required: - clusterName - selector @@ -531,39 +498,6 @@ spec: type: object minItems: 1 type: array - unhealthyMachineConditions: - description: |- - unhealthyMachineConditions contains a list of the machine conditions that determine - whether a node is considered unhealthy. The conditions are combined in a - logical OR, i.e. if any of the conditions is met, the node is unhealthy. - items: - description: |- - UnhealthyMachineCondition represents a Node condition type and value with a timeout - specified as a duration. When the named condition has been in the given - status for at least the timeout value, a node is considered unhealthy. - properties: - status: - description: status of the condition, one of True, False, Unknown. - minLength: 1 - type: string - timeout: - description: |- - timeout is the duration that a node must be in a given status for, - after which the node is considered unhealthy. - For example, with a value of "1h", the node must match the status - for at least 1 hour before being considered unhealthy. - type: string - type: - description: type of Node condition - minLength: 1 - type: string - required: - - status - - timeout - - type - type: object - maxItems: 100 - type: array unhealthyRange: description: |- unhealthyRange specifies the range of unhealthy machines allowed. @@ -872,39 +806,6 @@ spec: type: object maxItems: 100 type: array - unhealthyMachineConditions: - description: |- - unhealthyMachineConditions contains a list of the machine conditions that determine - whether a node is considered unhealthy. The conditions are combined in a - logical OR, i.e. if any of the conditions is met, the node is unhealthy. - items: - description: |- - UnhealthyMachineCondition represents a Node condition type and value with a timeout - specified as a duration. When the named condition has been in the given - status for at least the timeout value, a node is considered unhealthy. - properties: - status: - description: status of the condition, one of True, False, Unknown. - minLength: 1 - type: string - timeout: - description: |- - timeout is the duration that a node must be in a given status for, - after which the node is considered unhealthy. - For example, with a value of "1h", the node must match the status - for at least 1 hour before being considered unhealthy. - type: string - type: - description: type of Node condition - minLength: 1 - type: string - required: - - status - - timeout - - type - type: object - maxItems: 100 - type: array unhealthyRange: description: |- unhealthyRange specifies the range of unhealthy machines allowed. diff --git a/exp/topology/desiredstate/desired_state.go b/exp/topology/desiredstate/desired_state.go index 97b3cca45a04..8f7e441a4158 100644 --- a/exp/topology/desiredstate/desired_state.go +++ b/exp/topology/desiredstate/desired_state.go @@ -1424,13 +1424,14 @@ func computeMachineHealthCheck(ctx context.Context, healthCheckTarget client.Obj }, }, Spec: clusterv1.MachineHealthCheckSpec{ - ClusterName: cluster.Name, - Selector: *selector, - UnhealthyNodeConditions: check.UnhealthyNodeConditions, - MaxUnhealthy: check.MaxUnhealthy, - UnhealthyRange: check.UnhealthyRange, - NodeStartupTimeout: check.NodeStartupTimeout, - RemediationTemplate: check.RemediationTemplate, + ClusterName: cluster.Name, + Selector: *selector, + UnhealthyNodeConditions: check.UnhealthyNodeConditions, + UnhealthyMachineConditions: check.UnhealthyMachineConditions, + MaxUnhealthy: check.MaxUnhealthy, + UnhealthyRange: check.UnhealthyRange, + NodeStartupTimeout: check.NodeStartupTimeout, + RemediationTemplate: check.RemediationTemplate, }, } diff --git a/exp/topology/desiredstate/desired_state_test.go b/exp/topology/desiredstate/desired_state_test.go index 7a229320da41..1a70e92d784f 100644 --- a/exp/topology/desiredstate/desired_state_test.go +++ b/exp/topology/desiredstate/desired_state_test.go @@ -36,6 +36,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client/fake" + controlplanev1 "sigs.k8s.io/cluster-api/api/controlplane/kubeadm/v1beta2" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" runtimehooksv1 "sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1" runtimev1 "sigs.k8s.io/cluster-api/api/runtime/v1beta2" @@ -1430,6 +1431,15 @@ func TestComputeMachineDeployment(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, } + unhealthyMachineConditions := + []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + } + nodeTimeoutDuration := &metav1.Duration{Duration: time.Duration(1)} clusterClassFailureDomain := "A" @@ -1450,8 +1460,9 @@ func TestComputeMachineDeployment(t *testing.T) { WithInfrastructureTemplate(workerInfrastructureMachineTemplate). WithBootstrapTemplate(workerBootstrapTemplate). WithMachineHealthCheckClass(&clusterv1.MachineHealthCheckClass{ - UnhealthyNodeConditions: unhealthyNodeConditions, - NodeStartupTimeout: nodeTimeoutDuration, + UnhealthyNodeConditions: unhealthyNodeConditions, + UnhealthyMachineConditions: unhealthyMachineConditions, + NodeStartupTimeout: nodeTimeoutDuration, }). WithReadinessGates(clusterClassReadinessGates). WithFailureDomain(&clusterClassFailureDomain). @@ -1491,7 +1502,8 @@ func TestComputeMachineDeployment(t *testing.T) { BootstrapTemplate: workerBootstrapTemplate, InfrastructureMachineTemplate: workerInfrastructureMachineTemplate, MachineHealthCheck: &clusterv1.MachineHealthCheckClass{ - UnhealthyNodeConditions: unhealthyNodeConditions, + UnhealthyNodeConditions: unhealthyNodeConditions, + UnhealthyMachineConditions: unhealthyMachineConditions, NodeStartupTimeout: &metav1.Duration{ Duration: time.Duration(1), }, @@ -1651,7 +1663,8 @@ func TestComputeMachineDeployment(t *testing.T) { BootstrapTemplate: workerBootstrapTemplate, InfrastructureMachineTemplate: workerInfrastructureMachineTemplate, MachineHealthCheck: &clusterv1.MachineHealthCheckClass{ - UnhealthyNodeConditions: unhealthyNodeConditions, + UnhealthyNodeConditions: unhealthyNodeConditions, + UnhealthyMachineConditions: unhealthyMachineConditions, NodeStartupTimeout: &metav1.Duration{ Duration: time.Duration(1), }, @@ -1913,6 +1926,9 @@ func TestComputeMachineDeployment(t *testing.T) { // Check that UnhealthyNodeConditions are set as expected. g.Expect(actual.MachineHealthCheck.Spec.UnhealthyNodeConditions).To(BeComparableTo(unhealthyNodeConditions)) + + // Check that UnhealthyMachineConditions are set as expected. + g.Expect(actual.MachineHealthCheck.Spec.UnhealthyMachineConditions).To(BeComparableTo(unhealthyMachineConditions)) }) } @@ -3055,6 +3071,13 @@ func Test_computeMachineHealthCheck(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, NodeStartupTimeout: &metav1.Duration{ Duration: time.Duration(1), }, @@ -3100,6 +3123,13 @@ func Test_computeMachineHealthCheck(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, NodeStartupTimeout: &metav1.Duration{ Duration: time.Duration(1), }, diff --git a/internal/apis/core/v1alpha3/conversion_test.go b/internal/apis/core/v1alpha3/conversion_test.go index 1b7f134f1a4d..21af00ce6a1e 100644 --- a/internal/apis/core/v1alpha3/conversion_test.go +++ b/internal/apis/core/v1alpha3/conversion_test.go @@ -236,12 +236,23 @@ func hubClusterVariable(in *clusterv1.ClusterVariable, c randfill.Continue) { func MachineHealthCheckFuzzFunc(_ runtimeserializer.CodecFactory) []interface{} { return []interface{}{ + hubMachineHealthCheckSpec, hubMachineHealthCheckStatus, } } +func hubMachineHealthCheckSpec(in *clusterv1.MachineHealthCheckSpec, c randfill.Continue) { + c.FillNoCustom(in) + + // Drop UnhealthyMachineConditions as it does not exist in v1beta1. + if in.UnhealthyMachineConditions != nil { + in.UnhealthyMachineConditions = nil + } +} + func hubMachineHealthCheckStatus(in *clusterv1.MachineHealthCheckStatus, c randfill.Continue) { c.FillNoCustom(in) + // Drop empty structs with only omit empty fields. if in.Deprecated != nil { if in.Deprecated.V1Beta1 == nil || reflect.DeepEqual(in.Deprecated.V1Beta1, &clusterv1.MachineHealthCheckV1Beta1DeprecatedStatus{}) { diff --git a/internal/apis/core/v1alpha3/machinehealthcheck_types.go b/internal/apis/core/v1alpha3/machinehealthcheck_types.go index d4b6a63e06d6..ee583d6a68b9 100644 --- a/internal/apis/core/v1alpha3/machinehealthcheck_types.go +++ b/internal/apis/core/v1alpha3/machinehealthcheck_types.go @@ -40,14 +40,6 @@ type MachineHealthCheckSpec struct { // +kubebuilder:validation:MinItems=1 UnhealthyConditions []UnhealthyCondition `json:"unhealthyConditions"` - // unhealthyMachineConditions contains a list of the machine conditions that determine - // whether a node is considered unhealthy. The conditions are combined in a - // logical OR, i.e. if any of the conditions is met, the node is unhealthy. - // - // +optional - // +kubebuilder:validation:MaxItems=100 - UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"` - // maxUnhealthy specifies the maximum number of unhealthy machines allowed. // Any further remediation is only allowed if at most "maxUnhealthy" machines selected by // "selector" are not healthy. @@ -96,34 +88,6 @@ type UnhealthyCondition struct { // ANCHOR_END: UnhealthyCondition -// ANCHOR: UnhealthyMachineCondition - -// UnhealthyMachineCondition represents a Node condition type and value with a timeout -// specified as a duration. When the named condition has been in the given -// status for at least the timeout value, a node is considered unhealthy. -type UnhealthyMachineCondition struct { - // type of Node condition - // +kubebuilder:validation:Type=string - // +kubebuilder:validation:MinLength=1 - // +required - Type string `json:"type"` - - // status of the condition, one of True, False, Unknown. - // +kubebuilder:validation:Type=string - // +kubebuilder:validation:MinLength=1 - // +required - Status metav1.ConditionStatus `json:"status"` - - // timeout is the duration that a node must be in a given status for, - // after which the node is considered unhealthy. - // For example, with a value of "1h", the node must match the status - // for at least 1 hour before being considered unhealthy. - // +required - Timeout metav1.Duration `json:"timeout"` -} - -// ANCHOR_END: UnhealthyMachineCondition - // ANCHOR: MachineHealthCheckStatus // MachineHealthCheckStatus defines the observed state of MachineHealthCheck. diff --git a/internal/apis/core/v1alpha3/zz_generated.conversion.go b/internal/apis/core/v1alpha3/zz_generated.conversion.go index f2d90844fa30..7eccfeeade9f 100644 --- a/internal/apis/core/v1alpha3/zz_generated.conversion.go +++ b/internal/apis/core/v1alpha3/zz_generated.conversion.go @@ -259,16 +259,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*UnhealthyMachineCondition)(nil), (*v1beta2.UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha3_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(a.(*UnhealthyMachineCondition), b.(*v1beta2.UnhealthyMachineCondition), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*v1beta2.UnhealthyMachineCondition)(nil), (*UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha3_UnhealthyMachineCondition(a.(*v1beta2.UnhealthyMachineCondition), b.(*UnhealthyMachineCondition), scope) - }); err != nil { - return err - } if err := s.AddConversionFunc((*v1.Condition)(nil), (*Condition)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1_Condition_To_v1alpha3_Condition(a.(*v1.Condition), b.(*Condition), scope) }); err != nil { @@ -1001,7 +991,6 @@ func autoConvert_v1alpha3_MachineHealthCheckSpec_To_v1beta2_MachineHealthCheckSp out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyConditions requires manual conversion: does not exist in peer-type - out.UnhealthyMachineConditions = *(*[]v1beta2.UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) out.RemediationTemplate = (*corev1.ObjectReference)(unsafe.Pointer(in.RemediationTemplate)) @@ -1012,7 +1001,7 @@ func autoConvert_v1beta2_MachineHealthCheckSpec_To_v1alpha3_MachineHealthCheckSp out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyNodeConditions requires manual conversion: does not exist in peer-type - out.UnhealthyMachineConditions = *(*[]UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) + // WARNING: in.UnhealthyMachineConditions requires manual conversion: does not exist in peer-type out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) // WARNING: in.UnhealthyRange requires manual conversion: does not exist in peer-type out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) @@ -1570,27 +1559,3 @@ func autoConvert_v1beta2_ObjectMeta_To_v1alpha3_ObjectMeta(in *v1beta2.ObjectMet func Convert_v1beta2_ObjectMeta_To_v1alpha3_ObjectMeta(in *v1beta2.ObjectMeta, out *ObjectMeta, s conversion.Scope) error { return autoConvert_v1beta2_ObjectMeta_To_v1alpha3_ObjectMeta(in, out, s) } - -func autoConvert_v1alpha3_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { - out.Type = in.Type - out.Status = v1.ConditionStatus(in.Status) - out.Timeout = in.Timeout - return nil -} - -// Convert_v1alpha3_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition is an autogenerated conversion function. -func Convert_v1alpha3_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { - return autoConvert_v1alpha3_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in, out, s) -} - -func autoConvert_v1beta2_UnhealthyMachineCondition_To_v1alpha3_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { - out.Type = in.Type - out.Status = v1.ConditionStatus(in.Status) - out.Timeout = in.Timeout - return nil -} - -// Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha3_UnhealthyMachineCondition is an autogenerated conversion function. -func Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha3_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { - return autoConvert_v1beta2_UnhealthyMachineCondition_To_v1alpha3_UnhealthyMachineCondition(in, out, s) -} diff --git a/internal/apis/core/v1alpha3/zz_generated.deepcopy.go b/internal/apis/core/v1alpha3/zz_generated.deepcopy.go index e5744e3542bc..ac0ffaa9f4c5 100644 --- a/internal/apis/core/v1alpha3/zz_generated.deepcopy.go +++ b/internal/apis/core/v1alpha3/zz_generated.deepcopy.go @@ -577,11 +577,6 @@ func (in *MachineHealthCheckSpec) DeepCopyInto(out *MachineHealthCheckSpec) { *out = make([]UnhealthyCondition, len(*in)) copy(*out, *in) } - if in.UnhealthyMachineConditions != nil { - in, out := &in.UnhealthyMachineConditions, &out.UnhealthyMachineConditions - *out = make([]UnhealthyMachineCondition, len(*in)) - copy(*out, *in) - } if in.MaxUnhealthy != nil { in, out := &in.MaxUnhealthy, &out.MaxUnhealthy *out = new(intstr.IntOrString) @@ -1112,19 +1107,3 @@ func (in *UnhealthyCondition) DeepCopy() *UnhealthyCondition { in.DeepCopyInto(out) return out } - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *UnhealthyMachineCondition) DeepCopyInto(out *UnhealthyMachineCondition) { - *out = *in - out.Timeout = in.Timeout -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UnhealthyMachineCondition. -func (in *UnhealthyMachineCondition) DeepCopy() *UnhealthyMachineCondition { - if in == nil { - return nil - } - out := new(UnhealthyMachineCondition) - in.DeepCopyInto(out) - return out -} diff --git a/internal/apis/core/v1alpha4/conversion_test.go b/internal/apis/core/v1alpha4/conversion_test.go index 76b46fd13dd7..6dcc2d3b50be 100644 --- a/internal/apis/core/v1alpha4/conversion_test.go +++ b/internal/apis/core/v1alpha4/conversion_test.go @@ -255,12 +255,23 @@ func spokeMachineDeploymentSpec(in *MachineDeploymentSpec, c randfill.Continue) func MachineHealthCheckFuzzFunc(_ runtimeserializer.CodecFactory) []interface{} { return []interface{}{ + hubMachineHealthCheckSpec, hubMachineHealthCheckStatus, } } +func hubMachineHealthCheckSpec(in *clusterv1.MachineHealthCheckSpec, c randfill.Continue) { + c.FillNoCustom(in) + + // Drop UnhealthyMachineConditions as it does not exist in v1beta1. + if in.UnhealthyMachineConditions != nil { + in.UnhealthyMachineConditions = nil + } +} + func hubMachineHealthCheckStatus(in *clusterv1.MachineHealthCheckStatus, c randfill.Continue) { c.FillNoCustom(in) + // Drop empty structs with only omit empty fields. if in.Deprecated != nil { if in.Deprecated.V1Beta1 == nil || reflect.DeepEqual(in.Deprecated.V1Beta1, &clusterv1.MachineHealthCheckV1Beta1DeprecatedStatus{}) { diff --git a/internal/apis/core/v1alpha4/machinehealthcheck_types.go b/internal/apis/core/v1alpha4/machinehealthcheck_types.go index eaba479c67b7..955de61446d1 100644 --- a/internal/apis/core/v1alpha4/machinehealthcheck_types.go +++ b/internal/apis/core/v1alpha4/machinehealthcheck_types.go @@ -40,14 +40,6 @@ type MachineHealthCheckSpec struct { // +kubebuilder:validation:MinItems=1 UnhealthyConditions []UnhealthyCondition `json:"unhealthyConditions"` - // unhealthyMachineConditions contains a list of the machine conditions that determine - // whether a node is considered unhealthy. The conditions are combined in a - // logical OR, i.e. if any of the conditions is met, the node is unhealthy. - // - // +optional - // +kubebuilder:validation:MaxItems=100 - UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"` - // maxUnhealthy specifies the maximum number of unhealthy machines allowed. // Any further remediation is only allowed if at most "maxUnhealthy" machines selected by // "selector" are not healthy. @@ -108,34 +100,6 @@ type UnhealthyCondition struct { // ANCHOR_END: UnhealthyCondition -// ANCHOR: UnhealthyMachineCondition - -// UnhealthyMachineCondition represents a Node condition type and value with a timeout -// specified as a duration. When the named condition has been in the given -// status for at least the timeout value, a node is considered unhealthy. -type UnhealthyMachineCondition struct { - // type of Node condition - // +kubebuilder:validation:Type=string - // +kubebuilder:validation:MinLength=1 - // +required - Type string `json:"type"` - - // status of the condition, one of True, False, Unknown. - // +kubebuilder:validation:Type=string - // +kubebuilder:validation:MinLength=1 - // +required - Status metav1.ConditionStatus `json:"status"` - - // timeout is the duration that a node must be in a given status for, - // after which the node is considered unhealthy. - // For example, with a value of "1h", the node must match the status - // for at least 1 hour before being considered unhealthy. - // +required - Timeout metav1.Duration `json:"timeout"` -} - -// ANCHOR_END: UnhealthyMachineCondition - // ANCHOR: MachineHealthCheckStatus // MachineHealthCheckStatus defines the observed state of MachineHealthCheck. diff --git a/internal/apis/core/v1alpha4/zz_generated.conversion.go b/internal/apis/core/v1alpha4/zz_generated.conversion.go index f20879ee18b2..9a8b98f4babb 100644 --- a/internal/apis/core/v1alpha4/zz_generated.conversion.go +++ b/internal/apis/core/v1alpha4/zz_generated.conversion.go @@ -344,16 +344,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*UnhealthyMachineCondition)(nil), (*v1beta2.UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1alpha4_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(a.(*UnhealthyMachineCondition), b.(*v1beta2.UnhealthyMachineCondition), scope) - }); err != nil { - return err - } - if err := s.AddGeneratedConversionFunc((*v1beta2.UnhealthyMachineCondition)(nil), (*UnhealthyMachineCondition)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha4_UnhealthyMachineCondition(a.(*v1beta2.UnhealthyMachineCondition), b.(*UnhealthyMachineCondition), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*WorkersClass)(nil), (*v1beta2.WorkersClass)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_WorkersClass_To_v1beta2_WorkersClass(a.(*WorkersClass), b.(*v1beta2.WorkersClass), scope) }); err != nil { @@ -1434,7 +1424,6 @@ func autoConvert_v1alpha4_MachineHealthCheckSpec_To_v1beta2_MachineHealthCheckSp out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyConditions requires manual conversion: does not exist in peer-type - out.UnhealthyMachineConditions = *(*[]v1beta2.UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) out.UnhealthyRange = (*string)(unsafe.Pointer(in.UnhealthyRange)) out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) @@ -1446,7 +1435,7 @@ func autoConvert_v1beta2_MachineHealthCheckSpec_To_v1alpha4_MachineHealthCheckSp out.ClusterName = in.ClusterName out.Selector = in.Selector // WARNING: in.UnhealthyNodeConditions requires manual conversion: does not exist in peer-type - out.UnhealthyMachineConditions = *(*[]UnhealthyMachineCondition)(unsafe.Pointer(&in.UnhealthyMachineConditions)) + // WARNING: in.UnhealthyMachineConditions requires manual conversion: does not exist in peer-type out.MaxUnhealthy = (*intstr.IntOrString)(unsafe.Pointer(in.MaxUnhealthy)) out.UnhealthyRange = (*string)(unsafe.Pointer(in.UnhealthyRange)) out.NodeStartupTimeout = (*v1.Duration)(unsafe.Pointer(in.NodeStartupTimeout)) @@ -2081,30 +2070,6 @@ func autoConvert_v1beta2_Topology_To_v1alpha4_Topology(in *v1beta2.Topology, out return nil } -func autoConvert_v1alpha4_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { - out.Type = in.Type - out.Status = v1.ConditionStatus(in.Status) - out.Timeout = in.Timeout - return nil -} - -// Convert_v1alpha4_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition is an autogenerated conversion function. -func Convert_v1alpha4_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in *UnhealthyMachineCondition, out *v1beta2.UnhealthyMachineCondition, s conversion.Scope) error { - return autoConvert_v1alpha4_UnhealthyMachineCondition_To_v1beta2_UnhealthyMachineCondition(in, out, s) -} - -func autoConvert_v1beta2_UnhealthyMachineCondition_To_v1alpha4_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { - out.Type = in.Type - out.Status = v1.ConditionStatus(in.Status) - out.Timeout = in.Timeout - return nil -} - -// Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha4_UnhealthyMachineCondition is an autogenerated conversion function. -func Convert_v1beta2_UnhealthyMachineCondition_To_v1alpha4_UnhealthyMachineCondition(in *v1beta2.UnhealthyMachineCondition, out *UnhealthyMachineCondition, s conversion.Scope) error { - return autoConvert_v1beta2_UnhealthyMachineCondition_To_v1alpha4_UnhealthyMachineCondition(in, out, s) -} - func autoConvert_v1alpha4_WorkersClass_To_v1beta2_WorkersClass(in *WorkersClass, out *v1beta2.WorkersClass, s conversion.Scope) error { if in.MachineDeployments != nil { in, out := &in.MachineDeployments, &out.MachineDeployments diff --git a/internal/apis/core/v1alpha4/zz_generated.deepcopy.go b/internal/apis/core/v1alpha4/zz_generated.deepcopy.go index f3894ed965c9..6d3e29cb0b60 100644 --- a/internal/apis/core/v1alpha4/zz_generated.deepcopy.go +++ b/internal/apis/core/v1alpha4/zz_generated.deepcopy.go @@ -778,11 +778,6 @@ func (in *MachineHealthCheckSpec) DeepCopyInto(out *MachineHealthCheckSpec) { *out = make([]UnhealthyCondition, len(*in)) copy(*out, *in) } - if in.UnhealthyMachineConditions != nil { - in, out := &in.UnhealthyMachineConditions, &out.UnhealthyMachineConditions - *out = make([]UnhealthyMachineCondition, len(*in)) - copy(*out, *in) - } if in.MaxUnhealthy != nil { in, out := &in.MaxUnhealthy, &out.MaxUnhealthy *out = new(intstr.IntOrString) @@ -1349,22 +1344,6 @@ func (in *UnhealthyCondition) DeepCopy() *UnhealthyCondition { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *UnhealthyMachineCondition) DeepCopyInto(out *UnhealthyMachineCondition) { - *out = *in - out.Timeout = in.Timeout -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new UnhealthyMachineCondition. -func (in *UnhealthyMachineCondition) DeepCopy() *UnhealthyMachineCondition { - if in == nil { - return nil - } - out := new(UnhealthyMachineCondition) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *WorkersClass) DeepCopyInto(out *WorkersClass) { *out = *in diff --git a/internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go b/internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go index 19eaad11c2db..509a0d197ad6 100644 --- a/internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go +++ b/internal/controllers/machinehealthcheck/machinehealthcheck_controller_test.go @@ -41,6 +41,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + controlplanev1 "sigs.k8s.io/cluster-api/api/controlplane/kubeadm/v1beta2" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/cluster-api/api/core/v1beta2/index" "sigs.k8s.io/cluster-api/controllers/clustercache" @@ -1151,12 +1152,13 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { assertMachinesOwnerRemediated(g, mhc, 1) }) - t.Run("Machine's Node without conditions", func(t *testing.T) { + t.Run("Machine's Node and Machine without conditions", func(t *testing.T) { g := NewWithT(t) cluster := createCluster(g, ns.Name) mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) mhc.Spec.UnhealthyNodeConditions = nil + mhc.Spec.UnhealthyMachineConditions = nil g.Expect(env.Create(ctx, mhc)).To(Succeed()) defer func(do ...client.Object) { @@ -1323,6 +1325,115 @@ func TestMachineHealthCheck_Reconcile(t *testing.T) { assertMachinesOwnerRemediated(g, mhc, 1) }) + t.Run("should react when a Machine transitions to unhealthy", func(t *testing.T) { + g := NewWithT(t) + cluster := createCluster(g, ns.Name) + + mhc := newMachineHealthCheck(cluster.Namespace, cluster.Name) + + g.Expect(env.Create(ctx, mhc)).To(Succeed()) + defer func(do ...client.Object) { + g.Expect(env.Cleanup(ctx, do...)).To(Succeed()) + }(cluster, mhc) + + // Healthy nodes and machines. + _, machines, cleanup := createMachinesWithNodes(g, cluster, + count(1), + firstMachineAsControlPlane(), + createNodeRefForMachine(true), + nodeStatus(corev1.ConditionTrue), + machineLabels(mhc.Spec.Selector.MatchLabels), + ) + defer cleanup() + targetMachines := make([]string, len(machines)) + for i, m := range machines { + targetMachines[i] = m.Name + } + sort.Strings(targetMachines) + + // Make sure the status matches. + g.Eventually(func() *clusterv1.MachineHealthCheckStatus { + err := env.Get(ctx, util.ObjectKey(mhc), mhc) + if err != nil { + return nil + } + return &mhc.Status + }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ + ExpectedMachines: 1, + CurrentHealthy: 1, + RemediationsAllowed: 1, + ObservedGeneration: 1, + Targets: targetMachines, + Deprecated: &clusterv1.MachineHealthCheckDeprecatedStatus{ + V1Beta1: &clusterv1.MachineHealthCheckV1Beta1DeprecatedStatus{ + Conditions: clusterv1.Conditions{ + { + Type: clusterv1.RemediationAllowedV1Beta1Condition, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineHealthCheckRemediationAllowedCondition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineHealthCheckRemediationAllowedReason, + }, + }, + })) + + assertMachinesNotHealthy(g, mhc, 0) + assertMachinesOwnerRemediated(g, mhc, 0) + + // Transition the machine to unhealthy. + machine := machines[0] + machinePatch := client.MergeFrom(machine.DeepCopy()) + machine.Status.Conditions = []metav1.Condition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodFailedReason, + LastTransitionTime: metav1.NewTime(time.Now().Add(-10 * time.Minute)), + }, + } + g.Expect(env.Status().Patch(ctx, machine, machinePatch)).To(Succeed()) + + // Make sure the status matches. + g.Eventually(func() *clusterv1.MachineHealthCheckStatus { + err := env.Get(ctx, util.ObjectKey(mhc), mhc) + if err != nil { + return nil + } + return &mhc.Status + }).Should(MatchMachineHealthCheckStatus(&clusterv1.MachineHealthCheckStatus{ + ExpectedMachines: 1, + CurrentHealthy: 0, + ObservedGeneration: 1, + Targets: targetMachines, + Deprecated: &clusterv1.MachineHealthCheckDeprecatedStatus{ + V1Beta1: &clusterv1.MachineHealthCheckV1Beta1DeprecatedStatus{ + Conditions: clusterv1.Conditions{ + { + Type: clusterv1.RemediationAllowedV1Beta1Condition, + Status: corev1.ConditionTrue, + }, + }, + }, + }, + Conditions: []metav1.Condition{ + { + Type: clusterv1.MachineHealthCheckRemediationAllowedCondition, + Status: metav1.ConditionTrue, + Reason: clusterv1.MachineHealthCheckRemediationAllowedReason, + }, + }, + })) + + assertMachinesNotHealthy(g, mhc, 1) + assertMachinesOwnerRemediated(g, mhc, 1) + }) + t.Run("when in a MachineSet, unhealthy machines should be deleted", func(t *testing.T) { g := NewWithT(t) cluster := createCluster(g, ns.Name) @@ -2759,6 +2870,13 @@ func newMachineHealthCheck(namespace, clusterName string) *clusterv1.MachineHeal Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }, } } diff --git a/internal/controllers/topology/cluster/reconcile_state_test.go b/internal/controllers/topology/cluster/reconcile_state_test.go index 00ce08d65354..27687a11ff34 100644 --- a/internal/controllers/topology/cluster/reconcile_state_test.go +++ b/internal/controllers/topology/cluster/reconcile_state_test.go @@ -39,6 +39,7 @@ import ( . "sigs.k8s.io/controller-runtime/pkg/envtest/komega" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + controlplanev1 "sigs.k8s.io/cluster-api/api/controlplane/kubeadm/v1beta2" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" runtimehooksv1 "sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1" runtimev1 "sigs.k8s.io/cluster-api/api/runtime/v1beta2" @@ -1739,6 +1740,13 @@ func TestReconcileControlPlaneMachineHealthCheck(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, } maxUnhealthy := intstr.Parse("45%") // Create clusterClasses requiring controlPlaneInfrastructure and one not. @@ -1758,6 +1766,7 @@ func TestReconcileControlPlaneMachineHealthCheck(t *testing.T) { mhcBuilder := builder.MachineHealthCheck(metav1.NamespaceDefault, "cp1"). WithSelector(*selectors.ForControlPlaneMHC()). WithUnhealthyNodeConditions(mhcClass.UnhealthyNodeConditions). + WithUnhealthyMachineConditions(mhcClass.UnhealthyMachineConditions). WithClusterName("cluster1") tests := []struct { @@ -3315,6 +3324,13 @@ func TestReconcileMachineDeploymentMachineHealthCheck(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }). + WithUnhealthyMachineConditions([]clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }). WithClusterName("cluster1") infrastructureMachineTemplate := builder.TestInfrastructureMachineTemplate(metav1.NamespaceDefault, "infrastructure-machine-1").Build() @@ -3719,6 +3735,13 @@ func TestReconciler_reconcileMachineHealthCheck(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }). + WithUnhealthyMachineConditions([]clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }). WithClusterName("cluster1") tests := []struct { name string @@ -3743,6 +3766,12 @@ func TestReconciler_reconcileMachineHealthCheck(t *testing.T) { Status: corev1.ConditionUnknown, Timeout: metav1.Duration{Duration: 1000 * time.Minute}, }, + }).WithUnhealthyMachineConditions([]clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, }).Build(), want: mhcBuilder.DeepCopy().WithUnhealthyNodeConditions([]clusterv1.UnhealthyNodeCondition{ { @@ -3750,6 +3779,12 @@ func TestReconciler_reconcileMachineHealthCheck(t *testing.T) { Status: corev1.ConditionUnknown, Timeout: metav1.Duration{Duration: 1000 * time.Minute}, }, + }).WithUnhealthyMachineConditions([]clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, }).Build(), }, { diff --git a/internal/webhooks/clusterclass.go b/internal/webhooks/clusterclass.go index 888c2b6c6621..f8c8b7d8decb 100644 --- a/internal/webhooks/clusterclass.go +++ b/internal/webhooks/clusterclass.go @@ -523,11 +523,12 @@ func validateMachineHealthCheckClass(fldPath *field.Path, namepace string, m *cl Namespace: namepace, }, Spec: clusterv1.MachineHealthCheckSpec{ - NodeStartupTimeout: m.NodeStartupTimeout, - MaxUnhealthy: m.MaxUnhealthy, - UnhealthyNodeConditions: m.UnhealthyNodeConditions, - UnhealthyRange: m.UnhealthyRange, - RemediationTemplate: m.RemediationTemplate, + NodeStartupTimeout: m.NodeStartupTimeout, + MaxUnhealthy: m.MaxUnhealthy, + UnhealthyNodeConditions: m.UnhealthyNodeConditions, + UnhealthyMachineConditions: m.UnhealthyMachineConditions, + UnhealthyRange: m.UnhealthyRange, + RemediationTemplate: m.RemediationTemplate, }} return (&MachineHealthCheck{}).validateCommonFields(&mhc, fldPath) diff --git a/internal/webhooks/clusterclass_test.go b/internal/webhooks/clusterclass_test.go index 0fc4aa75da48..4d6936055f13 100644 --- a/internal/webhooks/clusterclass_test.go +++ b/internal/webhooks/clusterclass_test.go @@ -32,6 +32,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" + controlplanev1 "sigs.k8s.io/cluster-api/api/controlplane/kubeadm/v1beta2" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/cluster-api/api/core/v1beta2/index" "sigs.k8s.io/cluster-api/feature" @@ -897,6 +898,13 @@ func TestClusterClassValidation(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, NodeStartupTimeout: &metav1.Duration{ Duration: time.Duration(6000000000000), }, @@ -921,7 +929,7 @@ func TestClusterClassValidation(t *testing.T) { expectErr: true, }, { - name: "create does not fail if ControlPlane MachineHealthCheck does not define UnhealthyNodeConditions", + name: "create does not fail if ControlPlane MachineHealthCheck does not define UnhealthyNodeConditions or UnhealthyMachineConditions", in: builder.ClusterClass(metav1.NamespaceDefault, "class1"). WithInfrastructureClusterTemplate( builder.InfrastructureClusterTemplate(metav1.NamespaceDefault, "infra1").Build()). @@ -961,6 +969,13 @@ func TestClusterClassValidation(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, NodeStartupTimeout: &metav1.Duration{ Duration: time.Duration(6000000000000), }, @@ -990,6 +1005,13 @@ func TestClusterClassValidation(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, NodeStartupTimeout: &metav1.Duration{ // nodeStartupTimeout is too short here - 600ns. Duration: time.Duration(600), @@ -1000,7 +1022,7 @@ func TestClusterClassValidation(t *testing.T) { expectErr: true, }, { - name: "create does not fail if MachineDeployment MachineHealthCheck does not define UnhealthyNodeConditions", + name: "create does not fail if MachineDeployment MachineHealthCheck does not define UnhealthyNodeConditions or UnhealthyMachineConditions", in: builder.ClusterClass(metav1.NamespaceDefault, "class1"). WithInfrastructureClusterTemplate( builder.InfrastructureClusterTemplate(metav1.NamespaceDefault, "infra1").Build()). @@ -2303,6 +2325,13 @@ func TestClusterClassValidationWithClusterAwareChecks(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }). Build(), newClusterClass: builder.ClusterClass(metav1.NamespaceDefault, "clusterclass1"). @@ -2356,6 +2385,13 @@ func TestClusterClassValidationWithClusterAwareChecks(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }, }). Build()). @@ -2489,6 +2525,13 @@ func TestClusterClassValidationWithClusterAwareChecks(t *testing.T) { Timeout: metav1.Duration{Duration: 5 * time.Minute}, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }, }). Build()). diff --git a/internal/webhooks/machinehealthcheck_test.go b/internal/webhooks/machinehealthcheck_test.go index f7df68b28c89..be38da60cf12 100644 --- a/internal/webhooks/machinehealthcheck_test.go +++ b/internal/webhooks/machinehealthcheck_test.go @@ -25,6 +25,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + controlplanev1 "sigs.k8s.io/cluster-api/api/controlplane/kubeadm/v1beta2" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" "sigs.k8s.io/cluster-api/internal/webhooks/util" ) @@ -46,6 +47,13 @@ func TestMachineHealthCheckDefault(t *testing.T) { Status: corev1.ConditionFalse, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }, } webhook := &MachineHealthCheck{} @@ -92,6 +100,13 @@ func TestMachineHealthCheckLabelSelectorAsSelectorValidation(t *testing.T) { Status: corev1.ConditionFalse, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }, } webhook := &MachineHealthCheck{} @@ -154,6 +169,13 @@ func TestMachineHealthCheckClusterNameImmutable(t *testing.T) { Status: corev1.ConditionFalse, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }, } oldMHC := &clusterv1.MachineHealthCheck{ @@ -170,6 +192,13 @@ func TestMachineHealthCheckClusterNameImmutable(t *testing.T) { Status: corev1.ConditionFalse, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }, } @@ -329,6 +358,70 @@ func TestMachineHealthCheckNodeStartupTimeout(t *testing.T) { } } +func TestMachineHealthCheckUnhealthyMachineConditions(t *testing.T) { + tests := []struct { + name string + unhealthyMachineConditions []clusterv1.UnhealthyMachineCondition + expectErr bool + }{ + { + name: "pass with correctly defined unhealthyMachineConditions", + unhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, + + expectErr: false, + }, + { + name: "do not fail if the UnhealthyMachineCondition array is nil", + unhealthyMachineConditions: nil, + expectErr: false, + }, + { + name: "do not fail if the UnhealthyMachineCondition array is nil", + unhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{}, + expectErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + mhc := &clusterv1.MachineHealthCheck{ + Spec: clusterv1.MachineHealthCheckSpec{ + Selector: metav1.LabelSelector{ + MatchLabels: map[string]string{ + "test": "test", + }, + }, + UnhealthyMachineConditions: tt.unhealthyMachineConditions, + }, + } + webhook := &MachineHealthCheck{} + + if tt.expectErr { + warnings, err := webhook.ValidateCreate(ctx, mhc) + g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) + warnings, err = webhook.ValidateUpdate(ctx, mhc, mhc) + g.Expect(err).To(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) + } else { + warnings, err := webhook.ValidateCreate(ctx, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) + warnings, err = webhook.ValidateUpdate(ctx, mhc, mhc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(warnings).To(BeEmpty()) + } + }) + } +} + func TestMachineHealthCheckMaxUnhealthy(t *testing.T) { tests := []struct { name string @@ -375,6 +468,13 @@ func TestMachineHealthCheckMaxUnhealthy(t *testing.T) { Status: corev1.ConditionFalse, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }, } webhook := &MachineHealthCheck{} @@ -407,6 +507,13 @@ func TestMachineHealthCheckSelectorValidation(t *testing.T) { Status: corev1.ConditionFalse, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }, } webhook := &MachineHealthCheck{} @@ -433,6 +540,13 @@ func TestMachineHealthCheckClusterNameSelectorValidation(t *testing.T) { Status: corev1.ConditionFalse, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }, } webhook := &MachineHealthCheck{} @@ -461,6 +575,13 @@ func TestMachineHealthCheckRemediationTemplateNamespaceValidation(t *testing.T) Status: corev1.ConditionFalse, }, }, + UnhealthyMachineConditions: []clusterv1.UnhealthyMachineCondition{ + { + Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, + Status: metav1.ConditionFalse, + Timeout: metav1.Duration{Duration: 5 * time.Minute}, + }, + }, }, } invalid := valid.DeepCopy() diff --git a/util/test/builder/builders.go b/util/test/builder/builders.go index c1110c30bb6f..29a480de985f 100644 --- a/util/test/builder/builders.go +++ b/util/test/builder/builders.go @@ -2051,13 +2051,14 @@ func setStatusFields(obj *unstructured.Unstructured, fields map[string]interface // MachineHealthCheckBuilder holds fields for creating a MachineHealthCheck. type MachineHealthCheckBuilder struct { - name string - namespace string - ownerRefs []metav1.OwnerReference - selector metav1.LabelSelector - clusterName string - unhealthyNodeConditions []clusterv1.UnhealthyNodeCondition - maxUnhealthy *intstr.IntOrString + name string + namespace string + ownerRefs []metav1.OwnerReference + selector metav1.LabelSelector + clusterName string + unhealthyNodeConditions []clusterv1.UnhealthyNodeCondition + unhealthyMachineConditions []clusterv1.UnhealthyMachineCondition + maxUnhealthy *intstr.IntOrString } // MachineHealthCheck returns a MachineHealthCheckBuilder with the given name and namespace. @@ -2086,6 +2087,12 @@ func (m *MachineHealthCheckBuilder) WithUnhealthyNodeConditions(conditions []clu return m } +// WithUnhealthyMachineConditions adds the spec used to build the parameters of the MachineHealthCheck. +func (m *MachineHealthCheckBuilder) WithUnhealthyMachineConditions(conditions []clusterv1.UnhealthyMachineCondition) *MachineHealthCheckBuilder { + m.unhealthyMachineConditions = conditions + return m +} + // WithOwnerReferences adds ownerreferences for the MachineHealthCheck. func (m *MachineHealthCheckBuilder) WithOwnerReferences(ownerRefs []metav1.OwnerReference) *MachineHealthCheckBuilder { m.ownerRefs = ownerRefs @@ -2112,10 +2119,11 @@ func (m *MachineHealthCheckBuilder) Build() *clusterv1.MachineHealthCheck { OwnerReferences: m.ownerRefs, }, Spec: clusterv1.MachineHealthCheckSpec{ - ClusterName: m.clusterName, - Selector: m.selector, - UnhealthyNodeConditions: m.unhealthyNodeConditions, - MaxUnhealthy: m.maxUnhealthy, + ClusterName: m.clusterName, + Selector: m.selector, + UnhealthyNodeConditions: m.unhealthyNodeConditions, + UnhealthyMachineConditions: m.unhealthyMachineConditions, + MaxUnhealthy: m.maxUnhealthy, }, } if m.clusterName != "" { diff --git a/util/test/builder/zz_generated.deepcopy.go b/util/test/builder/zz_generated.deepcopy.go index 410c0fafb3b4..0e2302750ec9 100644 --- a/util/test/builder/zz_generated.deepcopy.go +++ b/util/test/builder/zz_generated.deepcopy.go @@ -628,6 +628,11 @@ func (in *MachineHealthCheckBuilder) DeepCopyInto(out *MachineHealthCheckBuilder *out = make([]v1beta2.UnhealthyNodeCondition, len(*in)) copy(*out, *in) } + if in.unhealthyMachineConditions != nil { + in, out := &in.unhealthyMachineConditions, &out.unhealthyMachineConditions + *out = make([]v1beta2.UnhealthyMachineCondition, len(*in)) + copy(*out, *in) + } if in.maxUnhealthy != nil { in, out := &in.maxUnhealthy, &out.maxUnhealthy *out = new(intstr.IntOrString) From 9515f138d07db8fd30a435d78044769ec4fcc0a1 Mon Sep 17 00:00:00 2001 From: Justin Miron Date: Sun, 25 May 2025 12:43:08 -0500 Subject: [PATCH 3/4] api lint fix --- api/core/v1beta2/machinehealthcheck_types.go | 4 ++-- .../crd/bases/cluster.x-k8s.io_clusterclasses.yaml | 12 ++++++++++-- config/crd/bases/cluster.x-k8s.io_clusters.yaml | 12 ++++++++++-- .../bases/cluster.x-k8s.io_machinehealthchecks.yaml | 6 +++++- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/api/core/v1beta2/machinehealthcheck_types.go b/api/core/v1beta2/machinehealthcheck_types.go index 556c7fd862a3..29f7a5aaba3e 100644 --- a/api/core/v1beta2/machinehealthcheck_types.go +++ b/api/core/v1beta2/machinehealthcheck_types.go @@ -165,13 +165,13 @@ type UnhealthyMachineCondition struct { // type of Node condition // +kubebuilder:validation:Type=string // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:MaxLength=316 // +required Type string `json:"type"` // status of the condition, one of True, False, Unknown. - // +kubebuilder:validation:Type=string - // +kubebuilder:validation:MinLength=1 // +required + // +kubebuilder:validation:Enum=True;False;Unknown Status metav1.ConditionStatus `json:"status"` // timeout is the duration that a node must be in a given status for, diff --git a/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml b/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml index c98edb2fa8b9..583fc9c96fd8 100644 --- a/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml +++ b/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml @@ -2984,7 +2984,10 @@ spec: status: description: status of the condition, one of True, False, Unknown. - minLength: 1 + enum: + - "True" + - "False" + - Unknown type: string timeout: description: |- @@ -2995,6 +2998,7 @@ spec: type: string type: description: type of Node condition + maxLength: 316 minLength: 1 type: string required: @@ -4080,7 +4084,10 @@ spec: status: description: status of the condition, one of True, False, Unknown. - minLength: 1 + enum: + - "True" + - "False" + - Unknown type: string timeout: description: |- @@ -4091,6 +4098,7 @@ spec: type: string type: description: type of Node condition + maxLength: 316 minLength: 1 type: string required: diff --git a/config/crd/bases/cluster.x-k8s.io_clusters.yaml b/config/crd/bases/cluster.x-k8s.io_clusters.yaml index 4705cb6fd63d..ed69de9898ea 100644 --- a/config/crd/bases/cluster.x-k8s.io_clusters.yaml +++ b/config/crd/bases/cluster.x-k8s.io_clusters.yaml @@ -2524,7 +2524,10 @@ spec: status: description: status of the condition, one of True, False, Unknown. - minLength: 1 + enum: + - "True" + - "False" + - Unknown type: string timeout: description: |- @@ -2535,6 +2538,7 @@ spec: type: string type: description: type of Node condition + maxLength: 316 minLength: 1 type: string required: @@ -2895,7 +2899,10 @@ spec: status: description: status of the condition, one of True, False, Unknown. - minLength: 1 + enum: + - "True" + - "False" + - Unknown type: string timeout: description: |- @@ -2906,6 +2913,7 @@ spec: type: string type: description: type of Node condition + maxLength: 316 minLength: 1 type: string required: diff --git a/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml b/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml index 52bb9fa35597..0e592c6fbcbb 100644 --- a/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml +++ b/config/crd/bases/cluster.x-k8s.io_machinehealthchecks.yaml @@ -1176,7 +1176,10 @@ spec: properties: status: description: status of the condition, one of True, False, Unknown. - minLength: 1 + enum: + - "True" + - "False" + - Unknown type: string timeout: description: |- @@ -1187,6 +1190,7 @@ spec: type: string type: description: type of Node condition + maxLength: 316 minLength: 1 type: string required: From 3d76d7a33d7a8053ba5ebf4017c91944ff86636c Mon Sep 17 00:00:00 2001 From: Justin Miron Date: Sun, 25 May 2025 12:56:05 -0500 Subject: [PATCH 4/4] fix docs and blueprint_test --- .../healthchecking.md | 12 ++++ exp/topology/scope/blueprint_test.go | 71 +++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/docs/book/src/tasks/automated-machine-management/healthchecking.md b/docs/book/src/tasks/automated-machine-management/healthchecking.md index 124acd65bf9b..7ab65f1fe930 100644 --- a/docs/book/src/tasks/automated-machine-management/healthchecking.md +++ b/docs/book/src/tasks/automated-machine-management/healthchecking.md @@ -58,6 +58,11 @@ spec: - type: Ready status: "False" timeout: 300s + # Conditions to check on Machines, if any condition is matched for the duration of its timeout, the Machine is considered unhealthy + unhealthyMachineConditions: + - type: Ready + status: "False" + timeout: 300s ``` Use this example as the basis for defining a MachineHealthCheck for control plane nodes managed via @@ -81,6 +86,13 @@ spec: - type: Ready status: "False" timeout: 300s + unhealthyMachineConditions: + - type: Ready + status: "False" + timeout: 300s + - type: EtcdPodHealthy + status: "False" + timeout: 300s ```