From d006aae7c987e2f04af7d4ddb39925eb921105a7 Mon Sep 17 00:00:00 2001 From: David Collom Date: Tue, 1 Apr 2025 15:36:55 +0100 Subject: [PATCH] Track Kubernetes Channels for latest versions --- cmd/app/app.go | 16 +++- cmd/app/options.go | 18 +++- go.mod | 1 + go.sum | 2 + pkg/client/docker/docker.go | 2 + pkg/client/fallback/fallback.go | 4 +- pkg/client/util/http_backoff.go | 19 ++++ pkg/controller/kube_controller.go | 140 ++++++++++++++++++++++++++++++ pkg/metrics/kubernetes.go | 21 +++++ pkg/metrics/metrics.go | 48 +++++----- 10 files changed, 244 insertions(+), 27 deletions(-) create mode 100644 pkg/client/util/http_backoff.go create mode 100644 pkg/controller/kube_controller.go create mode 100644 pkg/metrics/kubernetes.go diff --git a/cmd/app/app.go b/cmd/app/app.go index e18d85f0..af61316f 100644 --- a/cmd/app/app.go +++ b/cmd/app/app.go @@ -110,15 +110,27 @@ func NewCommand(ctx context.Context) *cobra.Command { return fmt.Errorf("failed to setup image registry clients: %s", err) } - c := controller.NewPodReconciler(opts.CacheTimeout, + _ = client + + podController := controller.NewPodReconciler(opts.CacheTimeout, metricsServer, client, mgr.GetClient(), log, opts.DefaultTestAll, ) + if err := podController.SetupWithManager(mgr); err != nil { + return err + } - if err := c.SetupWithManager(mgr); err != nil { + kubeController := controller.NewKubeReconciler( + log, + mgr.GetConfig(), + metricsServer, + opts.KubeInterval, + opts.KubeChannel, + ) + if err := mgr.Add(kubeController); err != nil { return err } diff --git a/cmd/app/options.go b/cmd/app/options.go index b4bb191c..4e2a6b80 100644 --- a/cmd/app/options.go +++ b/cmd/app/options.go @@ -72,10 +72,16 @@ type Options struct { GracefulShutdownTimeout time.Duration CacheSyncPeriod time.Duration + KubeChannel string + KubeInterval time.Duration + + // kubeConfigFlags holds the flags for the kubernetes client kubeConfigFlags *genericclioptions.ConfigFlags - selfhosted selfhosted.Options + // Client holds the options for the image client(s) Client client.Options + // selfhosted holds the options for the selfhosted registry + selfhosted selfhosted.Options } func (o *Options) addFlags(cmd *cobra.Command) { @@ -133,7 +139,15 @@ func (o *Options) addAppFlags(fs *pflag.FlagSet) { fs.DurationVarP(&o.CacheSyncPeriod, "cache-sync-period", "", 5*time.Hour, - "The time in which all resources should be updated.") + "The duration in which all resources should be updated.") + + fs.DurationVarP(&o.KubeInterval, + "kube-interval", "", o.CacheSyncPeriod, + "The time in which kubernetes channels updates are checked.") + + fs.StringVarP(&o.KubeChannel, + "kube-channel", "", "stable", + "The Kubernetes channel to check against for cluster updates.") } func (o *Options) addAuthFlags(fs *pflag.FlagSet) { diff --git a/go.mod b/go.mod index 14584f84..391ad2fe 100644 --- a/go.mod +++ b/go.mod @@ -28,6 +28,7 @@ require ( ) require ( + github.com/Masterminds/semver/v3 v3.3.1 github.com/aws/aws-sdk-go-v2/config v1.29.12 github.com/aws/aws-sdk-go-v2/credentials v1.17.65 github.com/aws/aws-sdk-go-v2/service/ecr v1.43.0 diff --git a/go.sum b/go.sum index 1d17bdfe..52b36d6d 100644 --- a/go.sum +++ b/go.sum @@ -19,6 +19,8 @@ github.com/Azure/go-autorest/logger v0.2.2/go.mod h1:I5fg9K52o+iuydlWfa9T5K6WFos github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= github.com/Azure/go-autorest/tracing v0.6.1 h1:YUMSrC/CeD1ZnnXcNYU4a/fzsO35u2Fsful9L/2nyR0= github.com/Azure/go-autorest/tracing v0.6.1/go.mod h1:/3EgjbsjraOqiicERAeu3m7/z0x1TzjQGAwDrJrXGkc= +github.com/Masterminds/semver/v3 v3.3.1 h1:QtNSWtVZ3nBfk8mAOu/B6v7FMJ+NHTIgUPi7rj+4nv4= +github.com/Masterminds/semver/v3 v3.3.1/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38yqWM= github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg= github.com/aws/aws-sdk-go-v2/config v1.29.12 h1:Y/2a+jLPrPbHpFkpAAYkVEtJmxORlXoo5k2g1fa2sUo= diff --git a/pkg/client/docker/docker.go b/pkg/client/docker/docker.go index 12e34628..93dc0913 100644 --- a/pkg/client/docker/docker.go +++ b/pkg/client/docker/docker.go @@ -14,6 +14,7 @@ import ( "github.com/hashicorp/go-retryablehttp" "github.com/jetstack/version-checker/pkg/api" + "github.com/jetstack/version-checker/pkg/client/util" ) const ( @@ -43,6 +44,7 @@ func New(opts Options, log *logrus.Entry) (*Client, error) { retryclient.RetryMax = 10 retryclient.RetryWaitMax = 2 * time.Minute retryclient.RetryWaitMin = 1 * time.Second + retryclient.Backoff = util.HTTPBackOff retryclient.Logger = log.WithField("client", "docker") client := retryclient.StandardClient() diff --git a/pkg/client/fallback/fallback.go b/pkg/client/fallback/fallback.go index 12fc0c52..b273c5bc 100644 --- a/pkg/client/fallback/fallback.go +++ b/pkg/client/fallback/fallback.go @@ -54,9 +54,9 @@ func (c *Client) Tags(ctx context.Context, host, repo, image string) (tags []api remaining := len(c.clients) - i - 1 if remaining == 0 { - c.log.Debugf("failed to lookup via %q, Giving up, no more clients", client.Name()) + c.log.Infof("failed to lookup via %q, Giving up, no more clients", client.Name()) } else { - c.log.Debugf("failed to lookup via %q, continuing to search with %v clients remaining", client.Name(), remaining) + c.log.Infof("failed to lookup via %q, continuing to search with %v clients remaining", client.Name(), remaining) } } diff --git a/pkg/client/util/http_backoff.go b/pkg/client/util/http_backoff.go new file mode 100644 index 00000000..ab65ed1c --- /dev/null +++ b/pkg/client/util/http_backoff.go @@ -0,0 +1,19 @@ +package util + +import ( + "net/http" + "time" + + "github.com/hashicorp/go-retryablehttp" +) + +// This is a custom Backoff that enforces the Max wait duration. +// If the sleep is greater we refuse to sleep at all +func HTTPBackOff(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration { + sleep := retryablehttp.DefaultBackoff(min, max, attemptNum, resp) + if sleep <= max { + return sleep + } + + return 0 +} diff --git a/pkg/controller/kube_controller.go b/pkg/controller/kube_controller.go new file mode 100644 index 00000000..eb812280 --- /dev/null +++ b/pkg/controller/kube_controller.go @@ -0,0 +1,140 @@ +package controller + +import ( + "context" + "fmt" + "io" + "strings" + "time" + + "github.com/sirupsen/logrus" + + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + + "github.com/hashicorp/go-retryablehttp" + "github.com/jetstack/version-checker/pkg/metrics" + + "github.com/Masterminds/semver/v3" +) + +const channelURLSuffix = "https://dl.k8s.io/release/" + +type ClusterVersionScheduler struct { + client kubernetes.Interface + log *logrus.Entry + metrics *metrics.Metrics + interval time.Duration + channel string +} + +func NewKubeReconciler( + log *logrus.Entry, + config *rest.Config, + metrics *metrics.Metrics, + interval time.Duration, + channel string, +) *ClusterVersionScheduler { + + return &ClusterVersionScheduler{ + log: log, + client: kubernetes.NewForConfigOrDie(config), + interval: interval, + metrics: metrics, + channel: channel, + } +} + +func (s *ClusterVersionScheduler) Start(ctx context.Context) error { + go s.runScheduler(ctx) + return s.reconcile(ctx) +} + +func (s *ClusterVersionScheduler) runScheduler(ctx context.Context) { + ticker := time.NewTicker(s.interval) + defer ticker.Stop() + + s.log.WithField("interval", s.interval).WithField("channel", s.channel). + Info("ClusterVersionScheduler started") + + for { + select { + case <-ctx.Done(): + s.log.Info("ClusterVersionScheduler stopping") + return + case <-ticker.C: + if err := s.reconcile(ctx); err != nil { + s.log.Error(err, "Failed to reconcile cluster version") + } + } + } +} + +func (s *ClusterVersionScheduler) reconcile(_ context.Context) error { + // Get current cluster version + current, err := s.client.Discovery().ServerVersion() + if err != nil { + return fmt.Errorf("getting cluster version: %w", err) + } + + // Get latest stable version + latest, err := getLatestStableVersion(s.channel) + if err != nil { + return fmt.Errorf("fetching latest stable version: %w", err) + } + + latestSemVer, err := semver.NewVersion(latest) + if err != nil { + return err + } + currentSemVer, err := semver.NewVersion(current.GitVersion) + if err != nil { + return err + } + // Strip metadata from the versions + currentSemVerNoMeta, _ := currentSemVer.SetMetadata("") + latestSemVerNoMeta, _ := latestSemVer.SetMetadata("") + + // Register metrics! + s.metrics.RegisterKubeVersion(!currentSemVerNoMeta.LessThan(&latestSemVerNoMeta), + currentSemVerNoMeta.String(), latestSemVerNoMeta.String(), + s.channel, + ) + + s.log.WithFields(logrus.Fields{ + "currentVersion": currentSemVerNoMeta, + "latestStable": latestSemVerNoMeta, + "channel": s.channel, + }).Info("Cluster version check complete") + + return nil +} + +func getLatestStableVersion(channel string) (string, error) { + if !strings.HasSuffix(channel, ".txt") { + channel += ".txt" + } + + // We don't need a `/` here as its should be in the channelURLSuffix + channelURL := fmt.Sprintf("%s%s", channelURLSuffix, channel) + + client := retryablehttp.NewClient() + client.RetryMax = 3 + client.RetryWaitMin = 1 * time.Second + client.RetryWaitMax = 30 * time.Second + // Optional: Log using your own logrus/logr logger + client.Logger = nil + + resp, err := client.Get(channelURL) + if err != nil { + return "", err + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + return strings.TrimSpace(string(body)), nil +} diff --git a/pkg/metrics/kubernetes.go b/pkg/metrics/kubernetes.go new file mode 100644 index 00000000..c4f326d8 --- /dev/null +++ b/pkg/metrics/kubernetes.go @@ -0,0 +1,21 @@ +package metrics + +import "github.com/prometheus/client_golang/prometheus" + +func (m *Metrics) RegisterKubeVersion(isLatest bool, currentVersion, latestVersion, channel string) { + m.mu.Lock() + defer m.mu.Unlock() + + isLatestF := 0.0 + if isLatest { + isLatestF = 1.0 + } + + m.kubernetesVersion.With( + prometheus.Labels{ + "current_version": currentVersion, + "latest_version": latestVersion, + "channel": channel, + }, + ).Set(isLatestF) +} diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 09aa10ee..c6690a90 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -28,6 +28,9 @@ type Metrics struct { containerImageDuration *prometheus.GaugeVec containerImageErrors *prometheus.CounterVec + // Kubernetes version metric + kubernetesVersion *prometheus.GaugeVec + cache k8sclient.Reader // Contains all metrics for the roundtripper @@ -80,6 +83,16 @@ func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, cache k8sclient.R "namespace", "pod", "container", "image", }, ) + kubernetesVersion := promauto.With(reg).NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "version_checker", + Name: "is_latest_kube_version", + Help: "Where the current cluster is using the latest release channel version", + }, + []string{ + "current_version", "latest_version", "channel", + }, + ) return &Metrics{ log: log.WithField("module", "metrics"), @@ -90,6 +103,7 @@ func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, cache k8sclient.R containerImageDuration: containerImageDuration, containerImageChecked: containerImageChecked, containerImageErrors: containerImageErrors, + kubernetesVersion: kubernetesVersion, roundTripper: NewRoundTripper(reg), } } @@ -113,15 +127,11 @@ func (m *Metrics) AddImage(namespace, pod, container, containerType, imageURL st ).Set(float64(time.Now().Unix())) } -func (m *Metrics) RemoveImage(namespace, pod, container, containerType string) { - m.mu.Lock() - defer m.mu.Unlock() - total := 0 - - total += m.containerImageVersion.DeletePartialMatch( +func (m *Metrics) CleanUpMetrics(namespace, pod string) (total int) { + total += m.containerImageDuration.DeletePartialMatch( m.buildPartialLabels(namespace, pod), ) - total += m.containerImageDuration.DeletePartialMatch( + total += m.containerImageChecked.DeletePartialMatch( m.buildPartialLabels(namespace, pod), ) @@ -131,6 +141,15 @@ func (m *Metrics) RemoveImage(namespace, pod, container, containerType string) { total += m.containerImageErrors.DeletePartialMatch( m.buildPartialLabels(namespace, pod), ) + return total +} + +func (m *Metrics) RemoveImage(namespace, pod, container, containerType string) { + m.mu.Lock() + defer m.mu.Unlock() + + total := m.CleanUpMetrics(namespace, pod) + m.log.Infof("Removed %d metrics for image %s/%s/%s", total, namespace, pod, container) } @@ -138,20 +157,7 @@ func (m *Metrics) RemovePod(namespace, pod string) { m.mu.Lock() defer m.mu.Unlock() - total := 0 - total += m.containerImageVersion.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - total += m.containerImageDuration.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - total += m.containerImageChecked.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - total += m.containerImageErrors.DeletePartialMatch( - m.buildPartialLabels(namespace, pod), - ) - + total := m.CleanUpMetrics(namespace, pod) m.log.Infof("Removed %d metrics for pod %s/%s", total, namespace, pod) }