Skip to content

Commit 565dfe7

Browse files
committed
Fix issue 362 (#365)
1 parent 07571f9 commit 565dfe7

File tree

3 files changed

+109
-65
lines changed

3 files changed

+109
-65
lines changed

pkg/metrics/helpers.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package metrics
2+
3+
import (
4+
"context"
5+
6+
corev1 "k8s.io/api/core/v1"
7+
"k8s.io/apimachinery/pkg/types"
8+
9+
"github.com/prometheus/client_golang/prometheus"
10+
)
11+
12+
func buildFullLabels(namespace, pod, container, containerType, imageURL, currentVersion, latestVersion string) prometheus.Labels {
13+
return prometheus.Labels{
14+
"namespace": namespace,
15+
"pod": pod,
16+
"container_type": containerType,
17+
"container": container,
18+
"image": imageURL,
19+
"current_version": currentVersion,
20+
"latest_version": latestVersion,
21+
}
22+
}
23+
24+
func buildLastUpdatedLabels(namespace, pod, container, containerType, imageURL string) prometheus.Labels {
25+
return prometheus.Labels{
26+
"namespace": namespace,
27+
"pod": pod,
28+
"container_type": containerType,
29+
"container": container,
30+
"image": imageURL,
31+
}
32+
}
33+
34+
func buildPodPartialLabels(namespace, pod string) prometheus.Labels {
35+
return prometheus.Labels{
36+
"namespace": namespace,
37+
"pod": pod,
38+
}
39+
}
40+
41+
func buildContainerPartialLabels(namespace, pod, container, containerType string) prometheus.Labels {
42+
return prometheus.Labels{
43+
"namespace": namespace,
44+
"pod": pod,
45+
"container": container,
46+
"container_type": containerType,
47+
}
48+
}
49+
50+
// This _should_ leverage the Controllers Cache
51+
func (m *Metrics) PodExists(ctx context.Context, ns, name string) bool {
52+
pod := &corev1.Pod{}
53+
err := m.cache.Get(ctx, types.NamespacedName{Name: name, Namespace: ns}, pod)
54+
return err == nil && pod.GetDeletionTimestamp() == nil
55+
}

pkg/metrics/metrics.go

Lines changed: 20 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ import (
77

88
"github.com/sirupsen/logrus"
99

10-
corev1 "k8s.io/api/core/v1"
11-
"k8s.io/apimachinery/pkg/types"
1210
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
1311

1412
"github.com/prometheus/client_golang/prometheus"
@@ -17,6 +15,8 @@ import (
1715
ctrmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
1816
)
1917

18+
const MetricNamespace = "version_checker"
19+
2020
// Metrics is used to expose container image version checks as prometheus
2121
// metrics.
2222
type Metrics struct {
@@ -39,12 +39,13 @@ type Metrics struct {
3939
// func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, kubeClient k8sclient.Client) *Metrics {
4040
func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, cache k8sclient.Reader) *Metrics {
4141
// Attempt to register, but ignore errors
42+
// TODO: We should check for AlreadyRegisteredError err type here for better error handling
4243
_ = reg.Register(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
4344
_ = reg.Register(collectors.NewGoCollector())
4445

4546
containerImageVersion := promauto.With(reg).NewGaugeVec(
4647
prometheus.GaugeOpts{
47-
Namespace: "version_checker",
48+
Namespace: MetricNamespace,
4849
Name: "is_latest_version",
4950
Help: "Where the container in use is using the latest upstream registry version",
5051
},
@@ -54,7 +55,7 @@ func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, cache k8sclient.R
5455
)
5556
containerImageChecked := promauto.With(reg).NewGaugeVec(
5657
prometheus.GaugeOpts{
57-
Namespace: "version_checker",
58+
Namespace: MetricNamespace,
5859
Name: "last_checked",
5960
Help: "Timestamp when the image was checked",
6061
},
@@ -64,15 +65,15 @@ func New(log *logrus.Entry, reg ctrmetrics.RegistererGatherer, cache k8sclient.R
6465
)
6566
containerImageDuration := promauto.With(reg).NewGaugeVec(
6667
prometheus.GaugeOpts{
67-
Namespace: "version_checker",
68+
Namespace: MetricNamespace,
6869
Name: "image_lookup_duration",
6970
Help: "Time taken to lookup version.",
7071
},
7172
[]string{"namespace", "pod", "container", "image"},
7273
)
7374
containerImageErrors := promauto.With(reg).NewCounterVec(
7475
prometheus.CounterOpts{
75-
Namespace: "version_checker",
76+
Namespace: MetricNamespace,
7677
Name: "image_failures_total",
7778
Help: "Total number of errors where the version-checker was unable to get the latest upstream registry version",
7879
},
@@ -104,12 +105,12 @@ func (m *Metrics) AddImage(namespace, pod, container, containerType, imageURL st
104105
}
105106

106107
m.containerImageVersion.With(
107-
m.buildFullLabels(namespace, pod, container, containerType, imageURL, currentVersion, latestVersion),
108+
buildFullLabels(namespace, pod, container, containerType, imageURL, currentVersion, latestVersion),
108109
).Set(isLatestF)
109110

110111
// Bump last updated timestamp
111112
m.containerImageChecked.With(
112-
m.buildLastUpdatedLabels(namespace, pod, container, containerType, imageURL),
113+
buildLastUpdatedLabels(namespace, pod, container, containerType, imageURL),
113114
).Set(float64(time.Now().Unix()))
114115
}
115116

@@ -118,20 +119,14 @@ func (m *Metrics) RemoveImage(namespace, pod, container, containerType string) {
118119
defer m.mu.Unlock()
119120
total := 0
120121

121-
total += m.containerImageVersion.DeletePartialMatch(
122-
m.buildPartialLabels(namespace, pod),
123-
)
124-
total += m.containerImageDuration.DeletePartialMatch(
125-
m.buildPartialLabels(namespace, pod),
126-
)
122+
labels := buildContainerPartialLabels(namespace, pod, container, containerType)
127123

128-
total += m.containerImageChecked.DeletePartialMatch(
129-
m.buildPartialLabels(namespace, pod),
130-
)
131-
total += m.containerImageErrors.DeletePartialMatch(
132-
m.buildPartialLabels(namespace, pod),
133-
)
134-
m.log.Infof("Removed %d metrics for image %s/%s/%s", total, namespace, pod, container)
124+
total += m.containerImageVersion.DeletePartialMatch(labels)
125+
total += m.containerImageDuration.DeletePartialMatch(labels)
126+
total += m.containerImageChecked.DeletePartialMatch(labels)
127+
total += m.containerImageErrors.DeletePartialMatch(labels)
128+
129+
m.log.Infof("Removed %d metrics for image %s/%s/%s (%s)", total, namespace, pod, container, containerType)
135130
}
136131

137132
func (m *Metrics) RemovePod(namespace, pod string) {
@@ -140,16 +135,16 @@ func (m *Metrics) RemovePod(namespace, pod string) {
140135

141136
total := 0
142137
total += m.containerImageVersion.DeletePartialMatch(
143-
m.buildPartialLabels(namespace, pod),
138+
buildPodPartialLabels(namespace, pod),
144139
)
145140
total += m.containerImageDuration.DeletePartialMatch(
146-
m.buildPartialLabels(namespace, pod),
141+
buildPodPartialLabels(namespace, pod),
147142
)
148143
total += m.containerImageChecked.DeletePartialMatch(
149-
m.buildPartialLabels(namespace, pod),
144+
buildPodPartialLabels(namespace, pod),
150145
)
151146
total += m.containerImageErrors.DeletePartialMatch(
152-
m.buildPartialLabels(namespace, pod),
147+
buildPodPartialLabels(namespace, pod),
153148
)
154149

155150
m.log.Infof("Removed %d metrics for pod %s/%s", total, namespace, pod)
@@ -182,39 +177,3 @@ func (m *Metrics) ReportError(namespace, pod, container, imageURL string) {
182177
namespace, pod, container, imageURL,
183178
).Inc()
184179
}
185-
186-
func (m *Metrics) buildFullLabels(namespace, pod, container, containerType, imageURL, currentVersion, latestVersion string) prometheus.Labels {
187-
return prometheus.Labels{
188-
"namespace": namespace,
189-
"pod": pod,
190-
"container_type": containerType,
191-
"container": container,
192-
"image": imageURL,
193-
"current_version": currentVersion,
194-
"latest_version": latestVersion,
195-
}
196-
}
197-
198-
func (m *Metrics) buildLastUpdatedLabels(namespace, pod, container, containerType, imageURL string) prometheus.Labels {
199-
return prometheus.Labels{
200-
"namespace": namespace,
201-
"pod": pod,
202-
"container_type": containerType,
203-
"container": container,
204-
"image": imageURL,
205-
}
206-
}
207-
208-
func (m *Metrics) buildPartialLabels(namespace, pod string) prometheus.Labels {
209-
return prometheus.Labels{
210-
"namespace": namespace,
211-
"pod": pod,
212-
}
213-
}
214-
215-
// This _should_ leverage the Controllers Cache
216-
func (m *Metrics) PodExists(ctx context.Context, ns, name string) bool {
217-
pod := &corev1.Pod{}
218-
err := m.cache.Get(ctx, types.NamespacedName{Name: name, Namespace: ns}, pod)
219-
return err == nil && pod.GetDeletionTimestamp() == nil
220-
}

pkg/metrics/metrics_test.go

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,15 @@ func TestCache(t *testing.T) {
3636
for i, typ := range []string{"init", "container"} {
3737
version := fmt.Sprintf("0.1.%d", i)
3838
mt, _ := m.containerImageVersion.GetMetricWith(
39-
m.buildFullLabels("namespace", "pod", "container", typ, "url", version, version),
39+
buildFullLabels("namespace", "pod", "container", typ, "url", version, version),
4040
)
4141
count := testutil.ToFloat64(mt)
4242
assert.Equal(t, count, float64(1), "Expected to get a metric for containerImageVersion")
4343
}
4444

4545
// as well as the lastUpdated...
4646
for _, typ := range []string{"init", "container"} {
47-
mt, err := m.containerImageChecked.GetMetricWith(m.buildLastUpdatedLabels("namespace", "pod", "container", typ, "url"))
47+
mt, err := m.containerImageChecked.GetMetricWith(buildLastUpdatedLabels("namespace", "pod", "container", typ, "url"))
4848
require.NoError(t, err)
4949
count := testutil.ToFloat64(mt)
5050
assert.GreaterOrEqual(t, count, float64(time.Now().Unix()))
@@ -58,14 +58,14 @@ func TestCache(t *testing.T) {
5858
for i, typ := range []string{"init", "container"} {
5959
version := fmt.Sprintf("0.1.%d", i)
6060
mt, _ := m.containerImageVersion.GetMetricWith(
61-
m.buildFullLabels("namespace", "pod", "container", typ, "url", version, version),
61+
buildFullLabels("namespace", "pod", "container", typ, "url", version, version),
6262
)
6363
count := testutil.ToFloat64(mt)
6464
assert.Equal(t, count, float64(0), "Expected NOT to get a metric for containerImageVersion")
6565
}
6666
// And the Last Updated is removed too
6767
for _, typ := range []string{"init", "container"} {
68-
mt, err := m.containerImageChecked.GetMetricWith(m.buildLastUpdatedLabels("namespace", "pod", "container", typ, "url"))
68+
mt, err := m.containerImageChecked.GetMetricWith(buildLastUpdatedLabels("namespace", "pod", "container", typ, "url"))
6969
require.NoError(t, err)
7070
count := testutil.ToFloat64(mt)
7171
assert.Equal(t, count, float64(0), "Expected to get a metric for containerImageChecked")
@@ -184,3 +184,33 @@ func Test_Metrics_SkipOnDeletedPod(t *testing.T) {
184184
assert.NotContains(t, *mf.Name, "image_failures_total", "Should not have been found: %+v", mf)
185185
}
186186
}
187+
188+
func TestPodAnnotationsChangeAfterRegistration(t *testing.T) {
189+
// Step 2: Create Metrics with fake registry
190+
reg := prometheus.NewRegistry()
191+
log := logrus.NewEntry(logrus.New())
192+
client := fake.NewClientBuilder().Build()
193+
metrics := New(log, reg, client)
194+
195+
// Register Metrics...
196+
metrics.AddImage("default", "mypod", "my-init-container", "init", "alpine:latest", false, "1.0", "1.1")
197+
metrics.AddImage("default", "mypod", "mycontainer", "container", "nginx:1.0", true, "1.0", "1.0")
198+
metrics.AddImage("default", "mypod", "sidecar", "container", "alpine:1.0", false, "1.0", "1.1")
199+
200+
_, err := reg.Gather()
201+
require.NoError(t, err, "Failed to gather metrics")
202+
203+
assert.Equal(t, 3,
204+
testutil.CollectAndCount(metrics.containerImageVersion.MetricVec, MetricNamespace+"_is_latest_version"),
205+
)
206+
207+
// Pod Annotations are changed, only the `mycontainer` should be checked...
208+
209+
// Remove Init and sidecar
210+
metrics.RemoveImage("default", "mypod", "my-init-container", "init")
211+
metrics.RemoveImage("default", "mypod", "sidecar", "container")
212+
213+
assert.Equal(t, 1,
214+
testutil.CollectAndCount(metrics.containerImageVersion.MetricVec, MetricNamespace+"_is_latest_version"),
215+
)
216+
}

0 commit comments

Comments
 (0)