Skip to content

Commit f3a9726

Browse files
authored
⚠️ Restructure MHC fields in MHC, Cluster and ClusterClass CRDs (#12504)
* Restructure MHC fields in MHC, Cluster and ClusterClass CRDs Signed-off-by: Stefan Büringer buringerst@vmware.com * Fix review findings * Fix review findings * Remove Machine from Cluster & ClusterClass health check structs * Add migration documentation * Improve godoc * Adjust godoc comments --------- Signed-off-by: Stefan Büringer buringerst@vmware.com
1 parent 9dcb871 commit f3a9726

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+3581
-1892
lines changed

.golangci-kal.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,10 @@ linters:
178178
text: "optionalfields: field (Workers|Metadata|ControlPlane|Strategy|Infrastructure|DeprecatedV1Beta1Metadata) is optional and should (be a pointer|have the omitempty tag)"
179179
linters:
180180
- kubeapilinter
181+
- path: "api/core/v1beta2/(cluster_types.go|clusterclass_types.go|machinehealthcheck_types.go)"
182+
text: "optionalfields: field (Checks|Remediation|TriggerIf) is optional and should (be a pointer|have the omitempty tag)"
183+
linters:
184+
- kubeapilinter
181185
- path: "api/core/v1beta2/machinedeployment_types.go"
182186
text: "optionalfields: field (Strategy|RollingUpdate|Remediation) is optional and should (be a pointer|have the omitempty tag)"
183187
linters:

api/core/v1beta1/conversion.go

Lines changed: 194 additions & 57 deletions
Large diffs are not rendered by default.

api/core/v1beta1/conversion_test.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,20 @@ func spokeCluster(in *Cluster, c randfill.Continue) {
194194
}
195195

196196
dropEmptyStringsCluster(in)
197+
if in.Spec.Topology != nil {
198+
if in.Spec.Topology.ControlPlane.MachineHealthCheck != nil {
199+
dropEmptyString(&in.Spec.Topology.ControlPlane.MachineHealthCheck.UnhealthyRange)
200+
}
201+
if in.Spec.Topology.Workers != nil {
202+
for i, md := range in.Spec.Topology.Workers.MachineDeployments {
203+
dropEmptyString(&md.FailureDomain)
204+
if md.MachineHealthCheck != nil {
205+
dropEmptyString(&md.MachineHealthCheck.UnhealthyRange)
206+
}
207+
in.Spec.Topology.Workers.MachineDeployments[i] = md
208+
}
209+
}
210+
}
197211

198212
if in.Spec.ClusterNetwork != nil {
199213
if in.Spec.ClusterNetwork.Services != nil && reflect.DeepEqual(in.Spec.ClusterNetwork.Services, &NetworkRanges{}) {
@@ -332,6 +346,15 @@ func spokeClusterClass(in *ClusterClass, c randfill.Continue) {
332346
in.Namespace = "foo"
333347

334348
dropEmptyStringsClusterClass(in)
349+
if in.Spec.ControlPlane.MachineHealthCheck != nil {
350+
dropEmptyString(&in.Spec.ControlPlane.MachineHealthCheck.UnhealthyRange)
351+
}
352+
for i, md := range in.Spec.Workers.MachineDeployments {
353+
if md.MachineHealthCheck != nil {
354+
dropEmptyString(&md.MachineHealthCheck.UnhealthyRange)
355+
}
356+
in.Spec.Workers.MachineDeployments[i] = md
357+
}
335358
}
336359

337360
func spokeClusterClassStatus(in *ClusterClassStatus, c randfill.Continue) {
@@ -653,7 +676,7 @@ func spokeMachineHealthCheck(in *MachineHealthCheck, c randfill.Continue) {
653676

654677
in.Namespace = "foo"
655678

656-
dropEmptyStringsMachineHealthCheck(in)
679+
dropEmptyString(&in.Spec.UnhealthyRange)
657680
}
658681

659682
func spokeMachineHealthCheckStatus(in *MachineHealthCheckStatus, c randfill.Continue) {

api/core/v1beta1/zz_generated.conversion.go

Lines changed: 13 additions & 184 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/core/v1beta2/cluster_types.go

Lines changed: 258 additions & 26 deletions
Large diffs are not rendered by default.

api/core/v1beta2/clusterclass_types.go

Lines changed: 204 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@ limitations under the License.
1717
package v1beta2
1818

1919
import (
20-
"reflect"
21-
2220
corev1 "k8s.io/api/core/v1"
2321
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
2422
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -169,11 +167,11 @@ type ControlPlaneClass struct {
169167
// +optional
170168
MachineInfrastructure *ControlPlaneClassMachineInfrastructureTemplate `json:"machineInfrastructure,omitempty"`
171169

172-
// machineHealthCheck defines a MachineHealthCheck for this ControlPlaneClass.
170+
// healthCheck defines a MachineHealthCheck for this ControlPlaneClass.
173171
// This field is supported if and only if the ControlPlane provider template
174172
// referenced above is Machine based and supports setting replicas.
175173
// +optional
176-
MachineHealthCheck *MachineHealthCheckClass `json:"machineHealthCheck,omitempty"`
174+
HealthCheck *ControlPlaneClassHealthCheck `json:"healthCheck,omitempty"`
177175

178176
// namingStrategy allows changing the naming pattern used when creating the control plane provider object.
179177
// +optional
@@ -200,6 +198,106 @@ type ControlPlaneClass struct {
200198
ReadinessGates []MachineReadinessGate `json:"readinessGates,omitempty"`
201199
}
202200

201+
// ControlPlaneClassHealthCheck defines a MachineHealthCheck for control plane machines.
202+
// +kubebuilder:validation:MinProperties=1
203+
type ControlPlaneClassHealthCheck struct {
204+
// checks are the checks that are used to evaluate if a Machine is healthy.
205+
//
206+
// Independent of this configuration the MachineHealthCheck controller will always
207+
// flag Machines with `cluster.x-k8s.io/remediate-machine` annotation and
208+
// Machines with deleted Nodes as unhealthy.
209+
//
210+
// Furthermore, if checks.nodeStartupTimeoutSeconds is not set it
211+
// is defaulted to 10 minutes and evaluated accordingly.
212+
//
213+
// +optional
214+
Checks ControlPlaneClassHealthCheckChecks `json:"checks,omitempty,omitzero"`
215+
216+
// remediation configures if and how remediations are triggered if a Machine is unhealthy.
217+
//
218+
// If remediation or remediation.triggerIf is not set,
219+
// remediation will always be triggered for unhealthy Machines.
220+
//
221+
// If remediation or remediation.templateRef is not set,
222+
// the OwnerRemediated condition will be set on unhealthy Machines to trigger remediation via
223+
// the owner of the Machines, for example a MachineSet or a KubeadmControlPlane.
224+
//
225+
// +optional
226+
Remediation ControlPlaneClassHealthCheckRemediation `json:"remediation,omitempty,omitzero"`
227+
}
228+
229+
// ControlPlaneClassHealthCheckChecks are the checks that are used to evaluate if a control plane Machine is healthy.
230+
// +kubebuilder:validation:MinProperties=1
231+
type ControlPlaneClassHealthCheckChecks struct {
232+
// nodeStartupTimeoutSeconds allows to set the maximum time for MachineHealthCheck
233+
// to consider a Machine unhealthy if a corresponding Node isn't associated
234+
// through a `Spec.ProviderID` field.
235+
//
236+
// The duration set in this field is compared to the greatest of:
237+
// - Cluster's infrastructure ready condition timestamp (if and when available)
238+
// - Control Plane's initialized condition timestamp (if and when available)
239+
// - Machine's infrastructure ready condition timestamp (if and when available)
240+
// - Machine's metadata creation timestamp
241+
//
242+
// Defaults to 10 minutes.
243+
// If you wish to disable this feature, set the value explicitly to 0.
244+
// +optional
245+
// +kubebuilder:validation:Minimum=0
246+
NodeStartupTimeoutSeconds *int32 `json:"nodeStartupTimeoutSeconds,omitempty"`
247+
248+
// unhealthyNodeConditions contains a list of conditions that determine
249+
// whether a node is considered unhealthy. The conditions are combined in a
250+
// logical OR, i.e. if any of the conditions is met, the node is unhealthy.
251+
//
252+
// +optional
253+
// +listType=atomic
254+
// +kubebuilder:validation:MinItems=1
255+
// +kubebuilder:validation:MaxItems=100
256+
UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"`
257+
}
258+
259+
// ControlPlaneClassHealthCheckRemediation configures if and how remediations are triggered if a control plane Machine is unhealthy.
260+
// +kubebuilder:validation:MinProperties=1
261+
type ControlPlaneClassHealthCheckRemediation struct {
262+
// triggerIf configures if remediations are triggered.
263+
// If this field is not set, remediations are always triggered.
264+
// +optional
265+
TriggerIf ControlPlaneClassHealthCheckRemediationTriggerIf `json:"triggerIf,omitempty,omitzero"`
266+
267+
// templateRef is a reference to a remediation template
268+
// provided by an infrastructure provider.
269+
//
270+
// This field is completely optional, when filled, the MachineHealthCheck controller
271+
// creates a new object from the template referenced and hands off remediation of the machine to
272+
// a controller that lives outside of Cluster API.
273+
// +optional
274+
TemplateRef *MachineHealthCheckRemediationTemplateReference `json:"templateRef,omitempty"`
275+
}
276+
277+
// ControlPlaneClassHealthCheckRemediationTriggerIf configures if remediations are triggered.
278+
// +kubebuilder:validation:MinProperties=1
279+
type ControlPlaneClassHealthCheckRemediationTriggerIf struct {
280+
// unhealthyLessThanOrEqualTo specifies that remediations are only triggered if the number of
281+
// unhealthy Machines is less than or equal to the configured value.
282+
// unhealthyInRange takes precedence if set.
283+
//
284+
// +optional
285+
UnhealthyLessThanOrEqualTo *intstr.IntOrString `json:"unhealthyLessThanOrEqualTo,omitempty"`
286+
287+
// unhealthyInRange specifies that remediations are only triggered if the number of
288+
// unhealthy Machines is in the configured range.
289+
// Takes precedence over unhealthyLessThanOrEqualTo.
290+
// Eg. "[3-5]" - This means that remediation will be allowed only when:
291+
// (a) there are at least 3 unhealthy Machines (and)
292+
// (b) there are at most 5 unhealthy Machines
293+
//
294+
// +optional
295+
// +kubebuilder:validation:Pattern=^\[[0-9]+-[0-9]+\]$
296+
// +kubebuilder:validation:MinLength=1
297+
// +kubebuilder:validation:MaxLength=32
298+
UnhealthyInRange string `json:"unhealthyInRange,omitempty"`
299+
}
300+
203301
// ControlPlaneClassMachineDeletionSpec contains configuration options for Machine deletion.
204302
// +kubebuilder:validation:MinProperties=1
205303
type ControlPlaneClassMachineDeletionSpec struct {
@@ -303,9 +401,9 @@ type MachineDeploymentClass struct {
303401
// +required
304402
Infrastructure MachineDeploymentClassInfrastructureTemplate `json:"infrastructure"`
305403

306-
// machineHealthCheck defines a MachineHealthCheck for this MachineDeploymentClass.
404+
// healthCheck defines a MachineHealthCheck for this MachineDeploymentClass.
307405
// +optional
308-
MachineHealthCheck *MachineHealthCheckClass `json:"machineHealthCheck,omitempty"`
406+
HealthCheck *MachineDeploymentClassHealthCheck `json:"healthCheck,omitempty"`
309407

310408
// failureDomain is the failure domain the machines will be created in.
311409
// Must match the name of a FailureDomain from the Cluster status.
@@ -353,6 +451,106 @@ type MachineDeploymentClass struct {
353451
Strategy MachineDeploymentStrategy `json:"strategy,omitempty,omitzero"`
354452
}
355453

454+
// MachineDeploymentClassHealthCheck defines a MachineHealthCheck for MachineDeployment machines.
455+
// +kubebuilder:validation:MinProperties=1
456+
type MachineDeploymentClassHealthCheck struct {
457+
// checks are the checks that are used to evaluate if a Machine is healthy.
458+
//
459+
// Independent of this configuration the MachineHealthCheck controller will always
460+
// flag Machines with `cluster.x-k8s.io/remediate-machine` annotation and
461+
// Machines with deleted Nodes as unhealthy.
462+
//
463+
// Furthermore, if checks.nodeStartupTimeoutSeconds is not set it
464+
// is defaulted to 10 minutes and evaluated accordingly.
465+
//
466+
// +optional
467+
Checks MachineDeploymentClassHealthCheckChecks `json:"checks,omitempty,omitzero"`
468+
469+
// remediation configures if and how remediations are triggered if a Machine is unhealthy.
470+
//
471+
// If remediation or remediation.triggerIf is not set,
472+
// remediation will always be triggered for unhealthy Machines.
473+
//
474+
// If remediation or remediation.templateRef is not set,
475+
// the OwnerRemediated condition will be set on unhealthy Machines to trigger remediation via
476+
// the owner of the Machines, for example a MachineSet or a KubeadmControlPlane.
477+
//
478+
// +optional
479+
Remediation MachineDeploymentClassHealthCheckRemediation `json:"remediation,omitempty,omitzero"`
480+
}
481+
482+
// MachineDeploymentClassHealthCheckChecks are the checks that are used to evaluate if a MachineDeployment Machine is healthy.
483+
// +kubebuilder:validation:MinProperties=1
484+
type MachineDeploymentClassHealthCheckChecks struct {
485+
// nodeStartupTimeoutSeconds allows to set the maximum time for MachineHealthCheck
486+
// to consider a Machine unhealthy if a corresponding Node isn't associated
487+
// through a `Spec.ProviderID` field.
488+
//
489+
// The duration set in this field is compared to the greatest of:
490+
// - Cluster's infrastructure ready condition timestamp (if and when available)
491+
// - Control Plane's initialized condition timestamp (if and when available)
492+
// - Machine's infrastructure ready condition timestamp (if and when available)
493+
// - Machine's metadata creation timestamp
494+
//
495+
// Defaults to 10 minutes.
496+
// If you wish to disable this feature, set the value explicitly to 0.
497+
// +optional
498+
// +kubebuilder:validation:Minimum=0
499+
NodeStartupTimeoutSeconds *int32 `json:"nodeStartupTimeoutSeconds,omitempty"`
500+
501+
// unhealthyNodeConditions contains a list of conditions that determine
502+
// whether a node is considered unhealthy. The conditions are combined in a
503+
// logical OR, i.e. if any of the conditions is met, the node is unhealthy.
504+
//
505+
// +optional
506+
// +listType=atomic
507+
// +kubebuilder:validation:MinItems=1
508+
// +kubebuilder:validation:MaxItems=100
509+
UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"`
510+
}
511+
512+
// MachineDeploymentClassHealthCheckRemediation configures if and how remediations are triggered if a MachineDeployment Machine is unhealthy.
513+
// +kubebuilder:validation:MinProperties=1
514+
type MachineDeploymentClassHealthCheckRemediation struct {
515+
// triggerIf configures if remediations are triggered.
516+
// If this field is not set, remediations are always triggered.
517+
// +optional
518+
TriggerIf MachineDeploymentClassHealthCheckRemediationTriggerIf `json:"triggerIf,omitempty,omitzero"`
519+
520+
// templateRef is a reference to a remediation template
521+
// provided by an infrastructure provider.
522+
//
523+
// This field is completely optional, when filled, the MachineHealthCheck controller
524+
// creates a new object from the template referenced and hands off remediation of the machine to
525+
// a controller that lives outside of Cluster API.
526+
// +optional
527+
TemplateRef *MachineHealthCheckRemediationTemplateReference `json:"templateRef,omitempty"`
528+
}
529+
530+
// MachineDeploymentClassHealthCheckRemediationTriggerIf configures if remediations are triggered.
531+
// +kubebuilder:validation:MinProperties=1
532+
type MachineDeploymentClassHealthCheckRemediationTriggerIf struct {
533+
// unhealthyLessThanOrEqualTo specifies that remediations are only triggered if the number of
534+
// unhealthy Machines is less than or equal to the configured value.
535+
// unhealthyInRange takes precedence if set.
536+
//
537+
// +optional
538+
UnhealthyLessThanOrEqualTo *intstr.IntOrString `json:"unhealthyLessThanOrEqualTo,omitempty"`
539+
540+
// unhealthyInRange specifies that remediations are only triggered if the number of
541+
// unhealthy Machines is in the configured range.
542+
// Takes precedence over unhealthyLessThanOrEqualTo.
543+
// Eg. "[3-5]" - This means that remediation will be allowed only when:
544+
// (a) there are at least 3 unhealthy Machines (and)
545+
// (b) there are at most 5 unhealthy Machines
546+
//
547+
// +optional
548+
// +kubebuilder:validation:Pattern=^\[[0-9]+-[0-9]+\]$
549+
// +kubebuilder:validation:MinLength=1
550+
// +kubebuilder:validation:MaxLength=32
551+
UnhealthyInRange string `json:"unhealthyInRange,omitempty"`
552+
}
553+
356554
// MachineDeploymentClassMachineDeletionSpec contains configuration options for Machine deletion.
357555
// +kubebuilder:validation:MinProperties=1
358556
type MachineDeploymentClassMachineDeletionSpec struct {
@@ -396,63 +594,6 @@ type MachineDeploymentClassNamingStrategy struct {
396594
Template string `json:"template,omitempty"`
397595
}
398596

399-
// MachineHealthCheckClass defines a MachineHealthCheck for a group of Machines.
400-
// +kubebuilder:validation:MinProperties=1
401-
type MachineHealthCheckClass struct {
402-
// unhealthyNodeConditions contains a list of conditions that determine
403-
// whether a node is considered unhealthy. The conditions are combined in a
404-
// logical OR, i.e. if any of the conditions is met, the node is unhealthy.
405-
//
406-
// +optional
407-
// +listType=atomic
408-
// +kubebuilder:validation:MinItems=1
409-
// +kubebuilder:validation:MaxItems=100
410-
UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"`
411-
412-
// maxUnhealthy specifies the maximum number of unhealthy machines allowed.
413-
// Any further remediation is only allowed if at most "maxUnhealthy" machines selected by
414-
// "selector" are not healthy.
415-
// +optional
416-
MaxUnhealthy *intstr.IntOrString `json:"maxUnhealthy,omitempty"`
417-
418-
// unhealthyRange specifies the range of unhealthy machines allowed.
419-
// Any further remediation is only allowed if the number of machines selected by "selector" as not healthy
420-
// is within the range of "unhealthyRange". Takes precedence over maxUnhealthy.
421-
// Eg. "[3-5]" - This means that remediation will be allowed only when:
422-
// (a) there are at least 3 unhealthy machines (and)
423-
// (b) there are at most 5 unhealthy machines
424-
// +optional
425-
// +kubebuilder:validation:Pattern=^\[[0-9]+-[0-9]+\]$
426-
// +kubebuilder:validation:MinLength=1
427-
// +kubebuilder:validation:MaxLength=32
428-
UnhealthyRange string `json:"unhealthyRange,omitempty"`
429-
430-
// nodeStartupTimeoutSeconds allows to set the maximum time for MachineHealthCheck
431-
// to consider a Machine unhealthy if a corresponding Node isn't associated
432-
// through a `Spec.ProviderID` field.
433-
//
434-
// The duration set in this field is compared to the greatest of:
435-
// - Cluster's infrastructure ready condition timestamp (if and when available)
436-
// - Control Plane's initialized condition timestamp (if and when available)
437-
// - Machine's infrastructure ready condition timestamp (if and when available)
438-
// - Machine's metadata creation timestamp
439-
//
440-
// Defaults to 10 minutes.
441-
// If you wish to disable this feature, set the value explicitly to 0.
442-
// +optional
443-
// +kubebuilder:validation:Minimum=0
444-
NodeStartupTimeoutSeconds *int32 `json:"nodeStartupTimeoutSeconds,omitempty"`
445-
446-
// remediationTemplate is a reference to a remediation template
447-
// provided by an infrastructure provider.
448-
//
449-
// This field is completely optional, when filled, the MachineHealthCheck controller
450-
// creates a new object from the template referenced and hands off remediation of the machine to
451-
// a controller that lives outside of Cluster API.
452-
// +optional
453-
RemediationTemplate *MachineHealthCheckRemediationTemplateReference `json:"remediationTemplate,omitempty"`
454-
}
455-
456597
// MachinePoolClass serves as a template to define a pool of worker nodes of the cluster
457598
// provisioned using `ClusterClass`.
458599
type MachinePoolClass struct {
@@ -550,11 +691,6 @@ type MachinePoolClassNamingStrategy struct {
550691
Template string `json:"template,omitempty"`
551692
}
552693

553-
// IsZero returns true if none of the values of MachineHealthCheckClass are defined.
554-
func (m MachineHealthCheckClass) IsZero() bool {
555-
return reflect.ValueOf(m).IsZero()
556-
}
557-
558694
// ClusterClassVariable defines a variable which can
559695
// be configured in the Cluster topology and used in patches.
560696
type ClusterClassVariable struct {

0 commit comments

Comments
 (0)