Skip to content

Commit c4d1834

Browse files
Fix TestKubeletConfigRemediation issue
1 parent 15f8f95 commit c4d1834

File tree

3 files changed

+124
-25
lines changed

3 files changed

+124
-25
lines changed

tests/e2e/framework/common.go

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,90 @@ func (f *Framework) restoreNodeLabelsForPool(n string) error {
949949
return nil
950950
}
951951

952+
// WaitForMachineConfigPoolUpdated waits for a MachineConfigPool to complete an update cycle.
953+
// It first waits for the Updated condition to become False (indicating update/reboot started),
954+
// then waits for it to become True again (indicating update/reboot completed).
955+
func (f *Framework) WaitForMachineConfigPoolUpdated(poolName string) error {
956+
if f.Platform == "rosa" {
957+
log.Printf("Bypassing MachineConfigPool update check on %s", f.Platform)
958+
return nil
959+
}
960+
961+
// First, get the initial state
962+
pool := mcfgv1.MachineConfigPool{}
963+
err := f.Client.Get(context.TODO(), types.NamespacedName{Name: poolName}, &pool)
964+
if err != nil {
965+
return fmt.Errorf("failed to find Machine Config Pool %s: %w", poolName, err)
966+
}
967+
968+
// Check initial state of MachineConfigPoolUpdated condition
969+
initialUpdated := false
970+
for _, c := range pool.Status.Conditions {
971+
if c.Type == mcfgv1.MachineConfigPoolUpdated {
972+
initialUpdated = (c.Status == core.ConditionTrue)
973+
break
974+
}
975+
}
976+
977+
// If the pool is already updated (True), wait for it to start updating (False)
978+
if initialUpdated {
979+
log.Printf("Machine Config Pool %s is currently updated. Waiting for update to start...\n", poolName)
980+
err = wait.PollImmediate(machineOperationRetryInterval, machineOperationTimeout, func() (bool, error) {
981+
pool := mcfgv1.MachineConfigPool{}
982+
err := f.Client.Get(context.TODO(), types.NamespacedName{Name: poolName}, &pool)
983+
if err != nil {
984+
log.Printf("failed to find Machine Config Pool %s\n", poolName)
985+
return false, err
986+
}
987+
988+
for _, c := range pool.Status.Conditions {
989+
if c.Type == mcfgv1.MachineConfigPoolUpdated {
990+
if c.Status == core.ConditionFalse {
991+
log.Printf("Machine Config Pool %s update started (Updated=False)\n", poolName)
992+
return true, nil
993+
}
994+
break
995+
}
996+
}
997+
998+
log.Printf("Machine Config Pool %s still updated, waiting for update to start...\n", poolName)
999+
return false, nil
1000+
})
1001+
if err != nil {
1002+
return fmt.Errorf("failed waiting for Machine Config Pool %s to start updating: %w", poolName, err)
1003+
}
1004+
} else {
1005+
log.Printf("Machine Config Pool %s is already updating (Updated=False). Waiting for update to complete...\n", poolName)
1006+
}
1007+
1008+
// Now wait for the update to complete (Updated=True)
1009+
err = wait.PollImmediate(machineOperationRetryInterval, machineOperationTimeout, func() (bool, error) {
1010+
pool := mcfgv1.MachineConfigPool{}
1011+
err := f.Client.Get(context.TODO(), types.NamespacedName{Name: poolName}, &pool)
1012+
if err != nil {
1013+
log.Printf("failed to find Machine Config Pool %s\n", poolName)
1014+
return false, err
1015+
}
1016+
1017+
for _, c := range pool.Status.Conditions {
1018+
if c.Type == mcfgv1.MachineConfigPoolUpdated {
1019+
if c.Status == core.ConditionTrue {
1020+
return true, nil
1021+
}
1022+
log.Printf("Machine Config Pool %s has not finished updating yet (Updated=%s)... retrying\n", poolName, c.Status)
1023+
return false, nil
1024+
}
1025+
}
1026+
return false, nil
1027+
})
1028+
if err != nil {
1029+
return fmt.Errorf("failed waiting for Machine Config Pool %s to be updated: %w", poolName, err)
1030+
}
1031+
1032+
log.Printf("Machine Config Pool %s is updated\n", poolName)
1033+
return nil
1034+
}
1035+
9521036
func (f *Framework) getNodesForPool(p *mcfgv1.MachineConfigPool) (core.NodeList, error) {
9531037
var nodeList core.NodeList
9541038
opts := &dynclient.ListOptions{

tests/e2e/framework/main_entry.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,11 @@ func (f *Framework) TearDown() error {
168168
if err != nil {
169169
return err
170170
}
171+
// Wait for worker pool to be updated after nodes are unlabeled
172+
err = f.WaitForMachineConfigPoolUpdated(workerPoolName)
173+
if err != nil {
174+
return err
175+
}
171176
err = f.cleanUpMachineConfigPool("e2e")
172177
if err != nil {
173178
return err

tests/e2e/serial/main_test.go

Lines changed: 35 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,49 +1632,34 @@ func TestVariableTemplate(t *testing.T) {
16321632

16331633
func TestKubeletConfigRemediation(t *testing.T) {
16341634
f := framework.Global
1635-
var baselineImage = fmt.Sprintf("%s:%s", brokenContentImagePath, "new_kubeletconfig")
1636-
const requiredRule = "kubelet-enable-streaming-connections"
1637-
pbName := framework.GetObjNameFromTest(t)
1638-
prefixName := func(profName, ruleBaseName string) string { return profName + "-" + ruleBaseName }
1639-
1640-
ocpPb, err := f.CreateProfileBundle(pbName, baselineImage, framework.OcpContentFile)
1641-
if err != nil {
1642-
t.Fatal(err)
1643-
}
1644-
defer f.Client.Delete(context.TODO(), ocpPb)
1645-
if err := f.WaitForProfileBundleStatus(pbName, compv1alpha1.DataStreamValid); err != nil {
1646-
t.Fatal(err)
1647-
}
1648-
1649-
// Check that if the rule we are going to test is there
1650-
requiredRuleName := prefixName(pbName, requiredRule)
1651-
requiredVersionRuleName := prefixName(pbName, "version-detect-in-ocp")
1652-
requiredVariableName := prefixName(pbName, "var-streaming-connection-timeouts")
1653-
suiteName := "kubelet-remediation-test-suite-node"
1635+
suiteName := framework.GetObjNameFromTest(t) + "-kubelet-remediation"
16541636

16551637
tp := &compv1alpha1.TailoredProfile{
16561638
ObjectMeta: metav1.ObjectMeta{
16571639
Name: suiteName,
16581640
Namespace: f.OperatorNamespace,
1641+
Annotations: map[string]string{
1642+
compv1alpha1.ProductTypeAnnotation: "Node",
1643+
},
16591644
},
16601645
Spec: compv1alpha1.TailoredProfileSpec{
16611646
Title: "kubelet-remediation-test-node",
16621647
Description: "A test tailored profile to test kubelet remediation",
16631648
EnableRules: []compv1alpha1.RuleReferenceSpec{
16641649
{
1665-
Name: requiredRuleName,
1650+
Name: "ocp4-kubelet-eviction-thresholds-set-hard-imagefs-inodesfree",
16661651
Rationale: "To be tested",
16671652
},
16681653
{
1669-
Name: requiredVersionRuleName,
1654+
Name: "ocp4-version-detect-in-ocp",
16701655
Rationale: "To be tested",
16711656
},
16721657
},
16731658
SetValues: []compv1alpha1.VariableValueSpec{
16741659
{
1675-
Name: requiredVariableName,
1660+
Name: "ocp4-var-kubelet-evictionhard-imagefs-inodesfree",
16761661
Rationale: "Value to be set",
1677-
Value: "8h0m0s",
1662+
Value: "1%",
16781663
},
16791664
},
16801665
},
@@ -1704,11 +1689,21 @@ func TestKubeletConfigRemediation(t *testing.T) {
17041689
},
17051690
}
17061691

1707-
err = f.Client.Create(context.TODO(), ssb, nil)
1692+
err := f.Client.Create(context.TODO(), ssb, nil)
17081693
if err != nil {
17091694
t.Fatal(err)
17101695
}
1711-
defer f.Client.Delete(context.TODO(), ssb)
1696+
// Cleanup: Delete ScanSettingBinding first, then wait for ComplianceSuite to be cascade-deleted
1697+
// This ensures proper cleanup before the next test runs
1698+
defer func() {
1699+
if err := f.Client.Delete(context.TODO(), ssb); err != nil {
1700+
t.Fatal(err)
1701+
return
1702+
}
1703+
if err := f.WaitForComplianceSuiteDeletion(suiteName, f.OperatorNamespace); err != nil {
1704+
t.Fatal(err)
1705+
}
1706+
}()
17121707

17131708
// Ensure that all the scans in the suite have finished and are marked as Done
17141709
err = f.WaitForSuiteScansStatus(f.OperatorNamespace, suiteName, compv1alpha1.PhaseDone, compv1alpha1.ResultNonCompliant)
@@ -1727,6 +1722,21 @@ func TestKubeletConfigRemediation(t *testing.T) {
17271722
t.Fatal(err)
17281723
}
17291724

1725+
// Cleanup: Unapply the remediation before test ends to prevent leaving nodes in a modified state
1726+
// This ensures the cluster is reset after the test completes
1727+
defer func() {
1728+
// Finally clean up by removing the remediation and waiting for the nodes to reboot one more time
1729+
err = f.UnApplyRemediationAndCheck(f.OperatorNamespace, remName, "worker")
1730+
if err != nil {
1731+
t.Fatal(err)
1732+
}
1733+
1734+
err = f.WaitForNodesToBeReady()
1735+
if err != nil {
1736+
t.Fatalf("failed waiting for nodes to reboot after unapplying MachineConfig: %s", err)
1737+
}
1738+
}()
1739+
17301740
err = f.ReRunScan(scanName, f.OperatorNamespace)
17311741
if err != nil {
17321742
t.Fatal(err)

0 commit comments

Comments
 (0)