Skip to content

Commit 00e0834

Browse files
Merge pull request #870 from GoogleCloudPlatform/metric-cache
Merge custom metrics stackdriver adapter metric caches into main branch
2 parents 416eb1d + 38265e8 commit 00e0834

File tree

6 files changed

+632
-4
lines changed

6 files changed

+632
-4
lines changed

custom-metrics-stackdriver-adapter/adapter.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ import (
4848
basecmd "sigs.k8s.io/custom-metrics-apiserver/pkg/cmd"
4949
)
5050

51+
const defaultExternalMetricsCacheSize = 300
52+
5153
// StackdriverAdapter is an adapter for Stackdriver
5254
type StackdriverAdapter struct {
5355
basecmd.AdapterBase
@@ -74,6 +76,10 @@ type stackdriverAdapterServerOptions struct {
7476
// ListFullCustomMetrics is a flag that whether list all pod custom metrics during api discovery.
7577
// Default = false, which only list 1 metric. Enabling this back would increase memory usage.
7678
ListFullCustomMetrics bool
79+
// ExternalMetricsCacheTTL specifies the cache expiration time for external metrics.
80+
ExternalMetricsCacheTTL time.Duration
81+
// ExternalMetricsCacheSize specifies the maximum number of stored entries in the external metric cache.
82+
ExternalMetricsCacheSize int
7783
}
7884

7985
func (sa *StackdriverAdapter) makeProviderOrDie(o *stackdriverAdapterServerOptions, rateInterval time.Duration, alignmentPeriod time.Duration) (provider.MetricsProvider, *translator.Translator) {
@@ -122,7 +128,7 @@ func (sa *StackdriverAdapter) makeProviderOrDie(o *stackdriverAdapterServerOptio
122128

123129
// If ListFullCustomMetrics is false, it returns one resource during api discovery `kubectl get --raw "/apis/custom.metrics.k8s.io/v1beta2"` to reduce memory usage.
124130
customMetricsListCache := listStackdriverCustomMetrics(translator, o.ListFullCustomMetrics, o.FallbackForContainerMetrics)
125-
return adapter.NewStackdriverProvider(client, mapper, gceConf, stackdriverService, translator, rateInterval, o.UseNewResourceModel, o.FallbackForContainerMetrics, customMetricsListCache), translator
131+
return adapter.NewStackdriverProvider(client, mapper, gceConf, stackdriverService, translator, rateInterval, o.UseNewResourceModel, o.FallbackForContainerMetrics, customMetricsListCache, o.ExternalMetricsCacheTTL, o.ExternalMetricsCacheSize), translator
126132
}
127133

128134
func listStackdriverCustomMetrics(translator *translator.Translator, listFullCustomMetrics bool, fallbackForContainerMetrics bool) []provider.CustomMetricInfo {
@@ -191,6 +197,7 @@ func main() {
191197
EnableCoreMetricsAPI: false,
192198
EnableDistributionSupport: false,
193199
ListFullCustomMetrics: false,
200+
ExternalMetricsCacheSize: defaultExternalMetricsCacheSize,
194201
}
195202

196203
flags.BoolVar(&serverOptions.UseNewResourceModel, "use-new-resource-model", serverOptions.UseNewResourceModel,
@@ -211,6 +218,10 @@ func main() {
211218
"Stackdriver Endpoint used by adapter. Default is https://monitoring.googleapis.com/")
212219
flags.BoolVar(&serverOptions.EnableDistributionSupport, "enable-distribution-support", serverOptions.EnableDistributionSupport,
213220
"enables support for scaling based on distribution values")
221+
flags.DurationVar(&serverOptions.ExternalMetricsCacheTTL, "external-metric-cache-ttl", serverOptions.ExternalMetricsCacheTTL,
222+
"The duration (e.g., 1m, 5s) for which external metric values are cached.")
223+
flags.IntVar(&serverOptions.ExternalMetricsCacheSize, "external-metric-cache-size", serverOptions.ExternalMetricsCacheSize,
224+
"The maximum number of entries in the external metric cache.")
214225

215226
flags.Parse(os.Args)
216227

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package provider
18+
19+
import (
20+
"time"
21+
22+
utilcache "k8s.io/apimachinery/pkg/util/cache"
23+
"k8s.io/metrics/pkg/apis/external_metrics"
24+
"sigs.k8s.io/custom-metrics-apiserver/pkg/provider"
25+
)
26+
27+
// externalMetricsCache encapsulates the cache for external metrics.
28+
type externalMetricsCache struct {
29+
cache *utilcache.LRUExpireCache
30+
ttl time.Duration
31+
}
32+
33+
// Cache key for GetExternalMetric requests.
34+
type cacheKey struct {
35+
namespace string
36+
metricSelector string
37+
info provider.ExternalMetricInfo
38+
}
39+
40+
func newExternalMetricsCache(size int, ttl time.Duration) *externalMetricsCache {
41+
return &externalMetricsCache{
42+
cache: utilcache.NewLRUExpireCache(size),
43+
ttl: ttl,
44+
}
45+
}
46+
47+
func (c *externalMetricsCache) get(key cacheKey) (*external_metrics.ExternalMetricValueList, bool) {
48+
if c.cache == nil {
49+
return nil, false
50+
}
51+
cachedValue, ok := c.cache.Get(key)
52+
if !ok {
53+
return nil, false
54+
}
55+
metrics, castOk := cachedValue.(*external_metrics.ExternalMetricValueList)
56+
if !castOk {
57+
// Remove corrupt entry
58+
c.cache.Remove(key)
59+
return nil, false
60+
}
61+
62+
return metrics, true
63+
}
64+
65+
func (c *externalMetricsCache) add(key cacheKey, value *external_metrics.ExternalMetricValueList) {
66+
if c.cache == nil {
67+
return
68+
}
69+
c.cache.Add(key, value, c.ttl)
70+
}
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package provider
18+
19+
import (
20+
"testing"
21+
"time"
22+
23+
"github.com/google/go-cmp/cmp"
24+
"k8s.io/apimachinery/pkg/api/resource"
25+
"k8s.io/metrics/pkg/apis/external_metrics"
26+
"sigs.k8s.io/custom-metrics-apiserver/pkg/provider"
27+
)
28+
29+
func TestExternalMetricCache(t *testing.T) {
30+
key1 := cacheKey{
31+
namespace: "ns1",
32+
metricSelector: "selector1",
33+
info: provider.ExternalMetricInfo{Metric: "metric1"},
34+
}
35+
36+
key2 := cacheKey{
37+
namespace: "ns2",
38+
metricSelector: "selector2",
39+
info: provider.ExternalMetricInfo{Metric: "metric2"},
40+
}
41+
42+
key3 := cacheKey{
43+
namespace: "ns3",
44+
metricSelector: "selector3",
45+
info: provider.ExternalMetricInfo{Metric: "metric3"},
46+
}
47+
48+
value1 := &external_metrics.ExternalMetricValueList{
49+
Items: []external_metrics.ExternalMetricValue{{MetricName: "metric1", Value: *resource.NewQuantity(100, resource.DecimalSI)}},
50+
}
51+
52+
value2 := &external_metrics.ExternalMetricValueList{
53+
Items: []external_metrics.ExternalMetricValue{{MetricName: "metric2", Value: *resource.NewQuantity(200, resource.DecimalSI)}},
54+
}
55+
56+
value3 := &external_metrics.ExternalMetricValueList{
57+
Items: []external_metrics.ExternalMetricValue{{MetricName: "metric3", Value: *resource.NewQuantity(300, resource.DecimalSI)}},
58+
}
59+
60+
tests := []struct {
61+
name string
62+
cacheSize int
63+
cacheTTL time.Duration
64+
setup func(cache *externalMetricsCache)
65+
key cacheKey
66+
wantValue *external_metrics.ExternalMetricValueList
67+
wantFound bool
68+
sleep time.Duration
69+
}{
70+
{
71+
name: "cache hit",
72+
cacheSize: 5,
73+
cacheTTL: 1 * time.Second,
74+
setup: func(cache *externalMetricsCache) {
75+
cache.add(key1, value1)
76+
},
77+
key: key1,
78+
wantValue: value1,
79+
wantFound: true,
80+
},
81+
{
82+
name: "cache miss",
83+
cacheSize: 5,
84+
cacheTTL: 1 * time.Second,
85+
setup: func(cache *externalMetricsCache) {},
86+
key: key2,
87+
wantValue: nil,
88+
wantFound: false,
89+
},
90+
{
91+
name: "cache expired",
92+
cacheSize: 5,
93+
cacheTTL: 1 * time.Second,
94+
setup: func(cache *externalMetricsCache) {
95+
cache.add(key1, value1)
96+
},
97+
key: key1,
98+
wantValue: nil,
99+
wantFound: false,
100+
sleep: 2 * time.Second,
101+
},
102+
{
103+
name: "cache not expired with longer TTL",
104+
cacheSize: 5,
105+
cacheTTL: 10 * time.Second,
106+
setup: func(cache *externalMetricsCache) {
107+
cache.add(key1, value1)
108+
},
109+
key: key1,
110+
wantValue: value1,
111+
wantFound: true,
112+
sleep: 2 * time.Second,
113+
},
114+
{
115+
name: "cache hit after set",
116+
cacheSize: 5,
117+
cacheTTL: 1 * time.Second,
118+
setup: func(cache *externalMetricsCache) {
119+
cache.add(key2, value2)
120+
},
121+
key: key2,
122+
wantValue: value2,
123+
wantFound: true,
124+
},
125+
{
126+
name: "cache hit after set and get",
127+
cacheSize: 5,
128+
cacheTTL: 1 * time.Second,
129+
setup: func(cache *externalMetricsCache) {
130+
cache.add(key1, value1)
131+
cache.get(key1) // Simulate a previous get
132+
},
133+
key: key1,
134+
wantValue: value1,
135+
wantFound: true,
136+
},
137+
{
138+
name: "cache eviction",
139+
cacheSize: 2,
140+
cacheTTL: 1 * time.Minute,
141+
setup: func(cache *externalMetricsCache) {
142+
cache.add(key1, value1)
143+
cache.add(key2, value2)
144+
// Trigger eviction
145+
cache.add(key3, value3)
146+
},
147+
key: key1,
148+
wantValue: nil,
149+
},
150+
}
151+
152+
for _, tt := range tests {
153+
t.Run(tt.name, func(t *testing.T) {
154+
cache := newExternalMetricsCache(tt.cacheSize, tt.cacheTTL)
155+
tt.setup(cache)
156+
if tt.sleep > 0 {
157+
time.Sleep(tt.sleep)
158+
}
159+
gotValue, gotFound := cache.get(tt.key)
160+
161+
if gotFound != tt.wantFound {
162+
t.Errorf("Get() gotFound = %v, want %v", gotFound, tt.wantFound)
163+
}
164+
165+
if diff := cmp.Diff(tt.wantValue, gotValue); diff != "" {
166+
t.Errorf("Get() mismatch (-want +got):\n%s", diff)
167+
}
168+
})
169+
}
170+
}

custom-metrics-stackdriver-adapter/pkg/adapter/provider/provider.go

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,11 @@ type StackdriverProvider struct {
5656
useNewResourceModel bool
5757
metricsCache []provider.CustomMetricInfo
5858
fallbackForContainerMetrics bool
59+
externalMetricsCache *externalMetricsCache
5960
}
6061

6162
// NewStackdriverProvider creates a StackdriverProvider
62-
func NewStackdriverProvider(kubeClient *corev1.CoreV1Client, mapper apimeta.RESTMapper, gceConf *config.GceConfig, stackdriverService *stackdriver.Service, translator *translator.Translator, rateInterval time.Duration, useNewResourceModel bool, fallbackForContainerMetrics bool, customMetricsListCache []provider.CustomMetricInfo) provider.MetricsProvider {
63+
func NewStackdriverProvider(kubeClient *corev1.CoreV1Client, mapper apimeta.RESTMapper, gceConf *config.GceConfig, stackdriverService *stackdriver.Service, translator *translator.Translator, rateInterval time.Duration, useNewResourceModel bool, fallbackForContainerMetrics bool, customMetricsListCache []provider.CustomMetricInfo, externalMetricCacheTTL time.Duration, externalMetricCacheSize int) provider.MetricsProvider {
6364
p := &StackdriverProvider{
6465
kubeClient: kubeClient,
6566
stackdriverService: stackdriverService,
@@ -71,6 +72,13 @@ func NewStackdriverProvider(kubeClient *corev1.CoreV1Client, mapper apimeta.REST
7172
metricsCache: customMetricsListCache,
7273
}
7374

75+
if externalMetricCacheTTL > 0 {
76+
p.externalMetricsCache = newExternalMetricsCache(externalMetricCacheSize, externalMetricCacheTTL)
77+
klog.Infof("Started stackdriver provider with cache size: %d, cache TTL: %v", externalMetricCacheSize, externalMetricCacheTTL)
78+
} else {
79+
klog.Info("Stackdriver provider external metric cache is disabled. To enable the cache, ensure that --external-metric-cache-ttl is provided with non-zero value")
80+
}
81+
7482
return p
7583
}
7684

@@ -316,6 +324,21 @@ func (p *StackdriverProvider) ListAllMetrics() []provider.CustomMetricInfo {
316324

317325
// GetExternalMetric queries Stackdriver for external metrics.
318326
func (p *StackdriverProvider) GetExternalMetric(ctx context.Context, namespace string, metricSelector labels.Selector, info provider.ExternalMetricInfo) (*external_metrics.ExternalMetricValueList, error) {
327+
if p.externalMetricsCache != nil {
328+
key := cacheKey{
329+
namespace: namespace,
330+
metricSelector: metricSelector.String(),
331+
info: info,
332+
}
333+
if cachedValue, ok := p.externalMetricsCache.get(key); ok {
334+
return cachedValue, nil
335+
}
336+
}
337+
338+
// Proceed to do a fresh fetch for metrics since one of the following is true at this point
339+
// a) externalMetricCache is disabled
340+
// b) the key was never added to the cache
341+
// c) the key was in the cache, but its corrupt or its TTL has expired
319342
metricNameEscaped := info.Metric
320343
metricName := getExternalMetricName(metricNameEscaped)
321344
metricKind, metricValueType, err := p.translator.GetMetricKind(metricName, metricSelector)
@@ -334,9 +357,21 @@ func (p *StackdriverProvider) GetExternalMetric(ctx context.Context, namespace s
334357
if err != nil {
335358
return nil, err
336359
}
337-
return &external_metrics.ExternalMetricValueList{
360+
361+
resp := &external_metrics.ExternalMetricValueList{
338362
Items: externalMetricItems,
339-
}, nil
363+
}
364+
365+
if p.externalMetricsCache != nil {
366+
key := cacheKey{
367+
namespace: namespace,
368+
metricSelector: metricSelector.String(),
369+
info: info,
370+
}
371+
p.externalMetricsCache.add(key, resp)
372+
}
373+
374+
return resp, nil
340375
}
341376

342377
// ListAllExternalMetrics returns a list of available external metrics.

0 commit comments

Comments
 (0)