From 404c2e77e30b00f484bc704ac89f1969aeb0026e Mon Sep 17 00:00:00 2001 From: Ricardo Pchevuzinske Katz Date: Thu, 5 Jun 2025 09:53:09 -0300 Subject: [PATCH] Validate the cache contains the created address before moving on IPAM controller relies on controller runtime client, which is cached, to list the existing address before allocating a new IP. When the cache is too slow to reflect the changes, we may end up with an IPAddressClaim reconciliation happening before the previous reconciliation address acknowledge by the cache, which may end with duplicated IP address allocations. This change enforces that the Reconcile function waits for the cached client to be aware of the IPAddress before allowing the reconciliation to move on to the next reconciliation request --- pkg/ipamutil/reconciler.go | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pkg/ipamutil/reconciler.go b/pkg/ipamutil/reconciler.go index 58c1d8c..8c44e48 100644 --- a/pkg/ipamutil/reconciler.go +++ b/pkg/ipamutil/reconciler.go @@ -3,6 +3,7 @@ package ipamutil import ( "context" "fmt" + "time" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -10,6 +11,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" kerrors "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/apimachinery/pkg/util/wait" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" ipamv1 "sigs.k8s.io/cluster-api/exp/ipam/api/v1beta1" clusterutil "sigs.k8s.io/cluster-api/util" @@ -245,6 +247,23 @@ func (r *ClaimReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ct return unwrapResult(res), err } + // We need to ensure the address is properly watched by controller-runtime client (as it is cached) before moving + // to the next reconciliation request. + // This is required, otherwise if the next "EnsureAddress" is called too fast, or the controller-runtime cached client informer + // is too slow, we may end up not seeing that the IP Address is being used and can provide duplicated IPs. + err = wait.PollUntilContextTimeout(ctx, 5*time.Millisecond, 5*time.Second, true, func(ctx context.Context) (bool, error) { + key := client.ObjectKeyFromObject(&address) + if err := r.Client.Get(ctx, key, &ipamv1.IPAddress{}); err != nil { + return false, client.IgnoreNotFound(err) + } + return true, nil + }) + + if err != nil { + log.Error(err, "failed waiting for IPAddress to be visible in the cache after create", "namespace", address.GetNamespace(), "name", address.GetName()) + return ctrl.Result{}, errors.Wrapf(err, "failed waiting for IPAddress %s/%s to be visible in the cache after create", address.GetNamespace(), address.GetName()) + } + log.Info(fmt.Sprintf("IPAddress %s/%s (%s) has been %s", address.Namespace, address.Name, address.Spec.Address, operationResult), "IPAddressClaim", fmt.Sprintf("%s/%s", claim.Namespace, claim.Name))