Skip to content

⚠️ Restructure MHC fields in MHC, Cluster and ClusterClass CRDs #12504

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .golangci-kal.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,10 @@ linters:
text: "optionalfields: field (Workers|Metadata|ControlPlane|Strategy|Infrastructure|DeprecatedV1Beta1Metadata) is optional and should (be a pointer|have the omitempty tag)"
linters:
- kubeapilinter
- path: "api/core/v1beta2/(cluster_types.go|clusterclass_types.go|machinehealthcheck_types.go)"
text: "optionalfields: field (Checks|Remediation|TriggerIf) is optional and should (be a pointer|have the omitempty tag)"
linters:
- kubeapilinter
- path: "api/core/v1beta2/machinedeployment_types.go"
text: "optionalfields: field (Strategy|RollingUpdate|Remediation) is optional and should (be a pointer|have the omitempty tag)"
linters:
Expand Down
251 changes: 194 additions & 57 deletions api/core/v1beta1/conversion.go

Large diffs are not rendered by default.

25 changes: 24 additions & 1 deletion api/core/v1beta1/conversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,20 @@ func spokeCluster(in *Cluster, c randfill.Continue) {
}

dropEmptyStringsCluster(in)
if in.Spec.Topology != nil {
if in.Spec.Topology.ControlPlane.MachineHealthCheck != nil {
dropEmptyString(&in.Spec.Topology.ControlPlane.MachineHealthCheck.UnhealthyRange)
}
if in.Spec.Topology.Workers != nil {
for i, md := range in.Spec.Topology.Workers.MachineDeployments {
dropEmptyString(&md.FailureDomain)
if md.MachineHealthCheck != nil {
dropEmptyString(&md.MachineHealthCheck.UnhealthyRange)
}
in.Spec.Topology.Workers.MachineDeployments[i] = md
}
}
}

if in.Spec.ClusterNetwork != nil {
if in.Spec.ClusterNetwork.Services != nil && reflect.DeepEqual(in.Spec.ClusterNetwork.Services, &NetworkRanges{}) {
Expand Down Expand Up @@ -332,6 +346,15 @@ func spokeClusterClass(in *ClusterClass, c randfill.Continue) {
in.Namespace = "foo"

dropEmptyStringsClusterClass(in)
if in.Spec.ControlPlane.MachineHealthCheck != nil {
dropEmptyString(&in.Spec.ControlPlane.MachineHealthCheck.UnhealthyRange)
}
for i, md := range in.Spec.Workers.MachineDeployments {
if md.MachineHealthCheck != nil {
dropEmptyString(&md.MachineHealthCheck.UnhealthyRange)
}
in.Spec.Workers.MachineDeployments[i] = md
}
}

func spokeClusterClassStatus(in *ClusterClassStatus, c randfill.Continue) {
Expand Down Expand Up @@ -653,7 +676,7 @@ func spokeMachineHealthCheck(in *MachineHealthCheck, c randfill.Continue) {

in.Namespace = "foo"

dropEmptyStringsMachineHealthCheck(in)
dropEmptyString(&in.Spec.UnhealthyRange)
}

func spokeMachineHealthCheckStatus(in *MachineHealthCheckStatus, c randfill.Continue) {
Expand Down
197 changes: 13 additions & 184 deletions api/core/v1beta1/zz_generated.conversion.go

Large diffs are not rendered by default.

284 changes: 258 additions & 26 deletions api/core/v1beta2/cluster_types.go

Large diffs are not rendered by default.

272 changes: 204 additions & 68 deletions api/core/v1beta2/clusterclass_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ limitations under the License.
package v1beta2

import (
"reflect"

corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -169,11 +167,11 @@ type ControlPlaneClass struct {
// +optional
MachineInfrastructure *ControlPlaneClassMachineInfrastructureTemplate `json:"machineInfrastructure,omitempty"`

// machineHealthCheck defines a MachineHealthCheck for this ControlPlaneClass.
// healthCheck defines a MachineHealthCheck for this ControlPlaneClass.
// This field is supported if and only if the ControlPlane provider template
// referenced above is Machine based and supports setting replicas.
// +optional
MachineHealthCheck *MachineHealthCheckClass `json:"machineHealthCheck,omitempty"`
HealthCheck *ControlPlaneClassHealthCheck `json:"healthCheck,omitempty"`

// namingStrategy allows changing the naming pattern used when creating the control plane provider object.
// +optional
Expand All @@ -200,6 +198,106 @@ type ControlPlaneClass struct {
ReadinessGates []MachineReadinessGate `json:"readinessGates,omitempty"`
}

// ControlPlaneClassHealthCheck defines a MachineHealthCheck for control plane machines.
// +kubebuilder:validation:MinProperties=1
type ControlPlaneClassHealthCheck struct {
// checks are the checks that are used to evaluate if a Machine is healthy.
//
// Independent of this configuration the MachineHealthCheck controller will always
// flag Machines with `cluster.x-k8s.io/remediate-machine` annotation and
// Machines with deleted Nodes as unhealthy.
//
// Furthermore, if checks.nodeStartupTimeoutSeconds is not set it
// is defaulted to 10 minutes and evaluated accordingly.
//
// +optional
Checks ControlPlaneClassHealthCheckChecks `json:"checks,omitempty,omitzero"`

// remediation configures if and how remediations are triggered if a Machine is unhealthy.
//
// If remediation or remediation.triggerIf is not set,
// remediation will always be triggered for unhealthy Machines.
//
// If remediation or remediation.templateRef is not set,
// the OwnerRemediated condition will be set on unhealthy Machines to trigger remediation via
// the owner of the Machines, for example a MachineSet or a KubeadmControlPlane.
//
// +optional
Remediation ControlPlaneClassHealthCheckRemediation `json:"remediation,omitempty,omitzero"`
}

// ControlPlaneClassHealthCheckChecks are the checks that are used to evaluate if a control plane Machine is healthy.
// +kubebuilder:validation:MinProperties=1
type ControlPlaneClassHealthCheckChecks struct {
// nodeStartupTimeoutSeconds allows to set the maximum time for MachineHealthCheck
// to consider a Machine unhealthy if a corresponding Node isn't associated
// through a `Spec.ProviderID` field.
//
// The duration set in this field is compared to the greatest of:
// - Cluster's infrastructure ready condition timestamp (if and when available)
// - Control Plane's initialized condition timestamp (if and when available)
// - Machine's infrastructure ready condition timestamp (if and when available)
// - Machine's metadata creation timestamp
//
// Defaults to 10 minutes.
// If you wish to disable this feature, set the value explicitly to 0.
// +optional
// +kubebuilder:validation:Minimum=0
NodeStartupTimeoutSeconds *int32 `json:"nodeStartupTimeoutSeconds,omitempty"`

// unhealthyNodeConditions contains a list of conditions that determine
// whether a node is considered unhealthy. The conditions are combined in a
// logical OR, i.e. if any of the conditions is met, the node is unhealthy.
//
// +optional
// +listType=atomic
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"`
}

// ControlPlaneClassHealthCheckRemediation configures if and how remediations are triggered if a control plane Machine is unhealthy.
// +kubebuilder:validation:MinProperties=1
type ControlPlaneClassHealthCheckRemediation struct {
// triggerIf configures if remediations are triggered.
// If this field is not set, remediations are always triggered.
// +optional
TriggerIf ControlPlaneClassHealthCheckRemediationTriggerIf `json:"triggerIf,omitempty,omitzero"`

// templateRef is a reference to a remediation template
// provided by an infrastructure provider.
//
// This field is completely optional, when filled, the MachineHealthCheck controller
// creates a new object from the template referenced and hands off remediation of the machine to
// a controller that lives outside of Cluster API.
// +optional
TemplateRef *MachineHealthCheckRemediationTemplateReference `json:"templateRef,omitempty"`
}

// ControlPlaneClassHealthCheckRemediationTriggerIf configures if remediations are triggered.
// +kubebuilder:validation:MinProperties=1
type ControlPlaneClassHealthCheckRemediationTriggerIf struct {
// unhealthyLessThanOrEqualTo specifies that remediations are only triggered if the number of
// unhealthy Machines is less than or equal to the configured value.
// unhealthyInRange takes precedence if set.
//
// +optional
UnhealthyLessThanOrEqualTo *intstr.IntOrString `json:"unhealthyLessThanOrEqualTo,omitempty"`

// unhealthyInRange specifies that remediations are only triggered if the number of
// unhealthy Machines is in the configured range.
// Takes precedence over unhealthyLessThanOrEqualTo.
// Eg. "[3-5]" - This means that remediation will be allowed only when:
// (a) there are at least 3 unhealthy Machines (and)
// (b) there are at most 5 unhealthy Machines
//
// +optional
// +kubebuilder:validation:Pattern=^\[[0-9]+-[0-9]+\]$
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=32
UnhealthyInRange string `json:"unhealthyInRange,omitempty"`
}

// ControlPlaneClassMachineDeletionSpec contains configuration options for Machine deletion.
// +kubebuilder:validation:MinProperties=1
type ControlPlaneClassMachineDeletionSpec struct {
Expand Down Expand Up @@ -303,9 +401,9 @@ type MachineDeploymentClass struct {
// +required
Infrastructure MachineDeploymentClassInfrastructureTemplate `json:"infrastructure"`

// machineHealthCheck defines a MachineHealthCheck for this MachineDeploymentClass.
// healthCheck defines a MachineHealthCheck for this MachineDeploymentClass.
// +optional
MachineHealthCheck *MachineHealthCheckClass `json:"machineHealthCheck,omitempty"`
HealthCheck *MachineDeploymentClassHealthCheck `json:"healthCheck,omitempty"`

// failureDomain is the failure domain the machines will be created in.
// Must match the name of a FailureDomain from the Cluster status.
Expand Down Expand Up @@ -353,6 +451,106 @@ type MachineDeploymentClass struct {
Strategy MachineDeploymentStrategy `json:"strategy,omitempty,omitzero"`
}

// MachineDeploymentClassHealthCheck defines a MachineHealthCheck for MachineDeployment machines.
// +kubebuilder:validation:MinProperties=1
type MachineDeploymentClassHealthCheck struct {
// checks are the checks that are used to evaluate if a Machine is healthy.
//
// Independent of this configuration the MachineHealthCheck controller will always
// flag Machines with `cluster.x-k8s.io/remediate-machine` annotation and
// Machines with deleted Nodes as unhealthy.
//
// Furthermore, if checks.nodeStartupTimeoutSeconds is not set it
// is defaulted to 10 minutes and evaluated accordingly.
//
// +optional
Checks MachineDeploymentClassHealthCheckChecks `json:"checks,omitempty,omitzero"`

// remediation configures if and how remediations are triggered if a Machine is unhealthy.
//
// If remediation or remediation.triggerIf is not set,
// remediation will always be triggered for unhealthy Machines.
//
// If remediation or remediation.templateRef is not set,
// the OwnerRemediated condition will be set on unhealthy Machines to trigger remediation via
// the owner of the Machines, for example a MachineSet or a KubeadmControlPlane.
//
// +optional
Remediation MachineDeploymentClassHealthCheckRemediation `json:"remediation,omitempty,omitzero"`
}

// MachineDeploymentClassHealthCheckChecks are the checks that are used to evaluate if a MachineDeployment Machine is healthy.
// +kubebuilder:validation:MinProperties=1
type MachineDeploymentClassHealthCheckChecks struct {
// nodeStartupTimeoutSeconds allows to set the maximum time for MachineHealthCheck
// to consider a Machine unhealthy if a corresponding Node isn't associated
// through a `Spec.ProviderID` field.
//
// The duration set in this field is compared to the greatest of:
// - Cluster's infrastructure ready condition timestamp (if and when available)
// - Control Plane's initialized condition timestamp (if and when available)
// - Machine's infrastructure ready condition timestamp (if and when available)
// - Machine's metadata creation timestamp
//
// Defaults to 10 minutes.
// If you wish to disable this feature, set the value explicitly to 0.
// +optional
// +kubebuilder:validation:Minimum=0
NodeStartupTimeoutSeconds *int32 `json:"nodeStartupTimeoutSeconds,omitempty"`

// unhealthyNodeConditions contains a list of conditions that determine
// whether a node is considered unhealthy. The conditions are combined in a
// logical OR, i.e. if any of the conditions is met, the node is unhealthy.
//
// +optional
// +listType=atomic
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"`
}

// MachineDeploymentClassHealthCheckRemediation configures if and how remediations are triggered if a MachineDeployment Machine is unhealthy.
// +kubebuilder:validation:MinProperties=1
type MachineDeploymentClassHealthCheckRemediation struct {
// triggerIf configures if remediations are triggered.
// If this field is not set, remediations are always triggered.
// +optional
TriggerIf MachineDeploymentClassHealthCheckRemediationTriggerIf `json:"triggerIf,omitempty,omitzero"`

// templateRef is a reference to a remediation template
// provided by an infrastructure provider.
//
// This field is completely optional, when filled, the MachineHealthCheck controller
// creates a new object from the template referenced and hands off remediation of the machine to
// a controller that lives outside of Cluster API.
// +optional
TemplateRef *MachineHealthCheckRemediationTemplateReference `json:"templateRef,omitempty"`
}

// MachineDeploymentClassHealthCheckRemediationTriggerIf configures if remediations are triggered.
// +kubebuilder:validation:MinProperties=1
type MachineDeploymentClassHealthCheckRemediationTriggerIf struct {
// unhealthyLessThanOrEqualTo specifies that remediations are only triggered if the number of
// unhealthy Machines is less than or equal to the configured value.
// unhealthyInRange takes precedence if set.
//
// +optional
UnhealthyLessThanOrEqualTo *intstr.IntOrString `json:"unhealthyLessThanOrEqualTo,omitempty"`

// unhealthyInRange specifies that remediations are only triggered if the number of
// unhealthy Machines is in the configured range.
// Takes precedence over unhealthyLessThanOrEqualTo.
// Eg. "[3-5]" - This means that remediation will be allowed only when:
// (a) there are at least 3 unhealthy Machines (and)
// (b) there are at most 5 unhealthy Machines
//
// +optional
// +kubebuilder:validation:Pattern=^\[[0-9]+-[0-9]+\]$
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=32
UnhealthyInRange string `json:"unhealthyInRange,omitempty"`
}

// MachineDeploymentClassMachineDeletionSpec contains configuration options for Machine deletion.
// +kubebuilder:validation:MinProperties=1
type MachineDeploymentClassMachineDeletionSpec struct {
Expand Down Expand Up @@ -396,63 +594,6 @@ type MachineDeploymentClassNamingStrategy struct {
Template string `json:"template,omitempty"`
}

// MachineHealthCheckClass defines a MachineHealthCheck for a group of Machines.
// +kubebuilder:validation:MinProperties=1
type MachineHealthCheckClass struct {
// unhealthyNodeConditions contains a list of conditions that determine
// whether a node is considered unhealthy. The conditions are combined in a
// logical OR, i.e. if any of the conditions is met, the node is unhealthy.
//
// +optional
// +listType=atomic
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"`

// maxUnhealthy specifies the maximum number of unhealthy machines allowed.
// Any further remediation is only allowed if at most "maxUnhealthy" machines selected by
// "selector" are not healthy.
// +optional
MaxUnhealthy *intstr.IntOrString `json:"maxUnhealthy,omitempty"`

// unhealthyRange specifies the range of unhealthy machines allowed.
// Any further remediation is only allowed if the number of machines selected by "selector" as not healthy
// is within the range of "unhealthyRange". Takes precedence over maxUnhealthy.
// Eg. "[3-5]" - This means that remediation will be allowed only when:
// (a) there are at least 3 unhealthy machines (and)
// (b) there are at most 5 unhealthy machines
// +optional
// +kubebuilder:validation:Pattern=^\[[0-9]+-[0-9]+\]$
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=32
UnhealthyRange string `json:"unhealthyRange,omitempty"`

// nodeStartupTimeoutSeconds allows to set the maximum time for MachineHealthCheck
// to consider a Machine unhealthy if a corresponding Node isn't associated
// through a `Spec.ProviderID` field.
//
// The duration set in this field is compared to the greatest of:
// - Cluster's infrastructure ready condition timestamp (if and when available)
// - Control Plane's initialized condition timestamp (if and when available)
// - Machine's infrastructure ready condition timestamp (if and when available)
// - Machine's metadata creation timestamp
//
// Defaults to 10 minutes.
// If you wish to disable this feature, set the value explicitly to 0.
// +optional
// +kubebuilder:validation:Minimum=0
NodeStartupTimeoutSeconds *int32 `json:"nodeStartupTimeoutSeconds,omitempty"`

// remediationTemplate is a reference to a remediation template
// provided by an infrastructure provider.
//
// This field is completely optional, when filled, the MachineHealthCheck controller
// creates a new object from the template referenced and hands off remediation of the machine to
// a controller that lives outside of Cluster API.
// +optional
RemediationTemplate *MachineHealthCheckRemediationTemplateReference `json:"remediationTemplate,omitempty"`
}

// MachinePoolClass serves as a template to define a pool of worker nodes of the cluster
// provisioned using `ClusterClass`.
type MachinePoolClass struct {
Expand Down Expand Up @@ -550,11 +691,6 @@ type MachinePoolClassNamingStrategy struct {
Template string `json:"template,omitempty"`
}

// IsZero returns true if none of the values of MachineHealthCheckClass are defined.
func (m MachineHealthCheckClass) IsZero() bool {
return reflect.ValueOf(m).IsZero()
}

// ClusterClassVariable defines a variable which can
// be configured in the Cluster topology and used in patches.
type ClusterClassVariable struct {
Expand Down
Loading