Skip to content

Commit 2732d31

Browse files
committed
feat: Thanos Querier to Thanos sidecar mTLS
1 parent 150b708 commit 2732d31

File tree

6 files changed

+237
-0
lines changed

6 files changed

+237
-0
lines changed

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ require (
88
github.com/go-logr/logr v1.4.2
99
github.com/google/go-cmp v0.6.0
1010
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999
11+
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8
1112
github.com/pkg/errors v0.9.1
1213
github.com/prometheus/common v0.55.0
1314
github.com/rhobs/obo-prometheus-operator v0.74.0-rhobs1
@@ -20,6 +21,7 @@ require (
2021
k8s.io/api v0.30.2
2122
k8s.io/apiextensions-apiserver v0.30.2
2223
k8s.io/apimachinery v0.30.2
24+
k8s.io/apiserver v0.30.2
2325
k8s.io/client-go v0.30.2
2426
k8s.io/component-base v0.30.2
2527
k8s.io/utils v0.0.0-20240310230437-4693a0247e57

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,8 @@ github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrB
373373
github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
374374
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999 h1:+S998xHiJApsJZjRAO8wyedU9GfqFd8mtwWly6LqHDo=
375375
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999/go.mod h1:CxgbWAlvu2iQB0UmKTtRu1YfepRg1/vJ64n2DlIEVz4=
376+
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8 h1:dKtHGYiOwl0DKZEWBW4MFWFS6IYW02AVD1WSuUAVwEo=
377+
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8/go.mod h1:ePlaOqUiPplRc++6aYdMe+2FmXb2xTNS9Nz5laG2YmI=
376378
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
377379
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
378380
github.com/ovh/go-ovh v1.4.3 h1:Gs3V823zwTFpzgGLZNI6ILS4rmxZgJwJCz54Er9LwD0=
@@ -802,6 +804,8 @@ k8s.io/apiextensions-apiserver v0.30.2 h1:l7Eue2t6QiLHErfn2vwK4KgF4NeDgjQkCXtEbO
802804
k8s.io/apiextensions-apiserver v0.30.2/go.mod h1:lsJFLYyK40iguuinsb3nt+Sj6CmodSI4ACDLep1rgjw=
803805
k8s.io/apimachinery v0.30.2 h1:fEMcnBj6qkzzPGSVsAZtQThU62SmQ4ZymlXRC5yFSCg=
804806
k8s.io/apimachinery v0.30.2/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc=
807+
k8s.io/apiserver v0.30.2 h1:ACouHiYl1yFI2VFI3YGM+lvxgy6ir4yK2oLOsLI1/tw=
808+
k8s.io/apiserver v0.30.2/go.mod h1:BOTdFBIch9Sv0ypSEcUR6ew/NUFGocRFNl72Ra7wTm8=
805809
k8s.io/client-go v0.30.2 h1:sBIVJdojUNPDU/jObC+18tXWcTJVcwyqS9diGdWHk50=
806810
k8s.io/client-go v0.30.2/go.mod h1:JglKSWULm9xlJLx4KCkfLLQ7XwtlbflV6uFFSHTMgVs=
807811
k8s.io/component-base v0.30.2 h1:pqGBczYoW1sno8q9ObExUqrYSKhtE5rW3y6gX88GZII=

pkg/assets/certificate_generator.go

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
package assets
2+
3+
import (
4+
"crypto/rand"
5+
"crypto/x509"
6+
"fmt"
7+
"math/big"
8+
"time"
9+
10+
"github.com/go-logr/logr"
11+
12+
"github.com/openshift/library-go/pkg/crypto"
13+
v1 "k8s.io/api/core/v1"
14+
"k8s.io/apimachinery/pkg/util/sets"
15+
"k8s.io/apiserver/pkg/authentication/user"
16+
)
17+
18+
const certificateLifetime = time.Duration(crypto.DefaultCertificateLifetimeInDays) * 24 * time.Hour
19+
const GRPCSecretName = "thanos-grpc-secret"
20+
21+
// Taken from
22+
// https://github.com/openshift/library-go/blob/08c2fd1b452520da35ad210930ea9d100545589a/pkg/operator/certrotation/signer.go#L68-L86
23+
// without refresh time handling. We just take care of rotation if we reach 1/5 of the validity timespan before expiration.
24+
func needsNewCert(notBefore, notAfter time.Time, now func() time.Time) bool {
25+
maxWait := notAfter.Sub(notBefore) / 5
26+
latestTime := notAfter.Add(-maxWait)
27+
return now().After(latestTime)
28+
}
29+
30+
// Taken from
31+
// https://github.com/openshift/cluster-monitoring-operator/blob/765d0b0369b176a5997d787b6710783437172879/pkg/manifests/tls.go#L113
32+
func RotateGRPCSecret(s *v1.Secret, logger logr.Logger) (bool, error) {
33+
var (
34+
curCA, newCA *crypto.CA
35+
curCABytes, crtPresent = s.Data["ca.crt"]
36+
curCAKeyBytes, keyPresent = s.Data["ca.key"]
37+
rotate = !crtPresent || !keyPresent
38+
)
39+
40+
if crtPresent && keyPresent {
41+
var err error
42+
curCA, err = crypto.GetCAFromBytes(curCABytes, curCAKeyBytes)
43+
if err != nil {
44+
logger.Info(fmt.Sprintf("generating a new CA due to error reading CA: %v", err))
45+
rotate = true
46+
} else if needsNewCert(curCA.Config.Certs[0].NotBefore, curCA.Config.Certs[0].NotAfter, time.Now) {
47+
logger.Info("generating new CA, because the current one is older than 1/5 of it validity timestamp")
48+
rotate = true
49+
}
50+
}
51+
52+
if !rotate {
53+
return rotate, nil
54+
}
55+
56+
if curCA == nil {
57+
newCAConfig, err := crypto.MakeSelfSignedCAConfig(
58+
fmt.Sprintf("%s@%d", "openshift-cluster-monitoring", time.Now().Unix()),
59+
crypto.DefaultCertificateLifetimeInDays,
60+
)
61+
if err != nil {
62+
return rotate, fmt.Errorf("error generating self signed CA: %w", err)
63+
}
64+
65+
newCA = &crypto.CA{
66+
SerialGenerator: &crypto.RandomSerialGenerator{},
67+
Config: newCAConfig,
68+
}
69+
} else {
70+
template := curCA.Config.Certs[0]
71+
now := time.Now()
72+
template.NotBefore = now.Add(-1 * time.Second)
73+
template.NotAfter = now.Add(certificateLifetime)
74+
template.SerialNumber = template.SerialNumber.Add(template.SerialNumber, big.NewInt(1))
75+
76+
newCACert, err := createCertificate(template, template, template.PublicKey, curCA.Config.Key)
77+
if err != nil {
78+
return rotate, fmt.Errorf("error rotating CA: %w", err)
79+
}
80+
81+
newCA = &crypto.CA{
82+
SerialGenerator: &crypto.RandomSerialGenerator{},
83+
Config: &crypto.TLSCertificateConfig{
84+
Certs: []*x509.Certificate{newCACert},
85+
Key: curCA.Config.Key,
86+
},
87+
}
88+
}
89+
90+
newCABytes, newCAKeyBytes, err := newCA.Config.GetPEMBytes()
91+
if err != nil {
92+
return rotate, fmt.Errorf("error getting PEM bytes from CA: %w", err)
93+
}
94+
95+
s.Data["ca.crt"] = newCABytes
96+
s.Data["ca.key"] = newCAKeyBytes
97+
98+
{
99+
cfg, err := newCA.MakeClientCertificateForDuration(
100+
&user.DefaultInfo{
101+
Name: "thanos-querier",
102+
},
103+
time.Duration(crypto.DefaultCertificateLifetimeInDays)*24*time.Hour,
104+
)
105+
if err != nil {
106+
return rotate, fmt.Errorf("error making client certificate: %w", err)
107+
}
108+
109+
crt, key, err := cfg.GetPEMBytes()
110+
if err != nil {
111+
return rotate, fmt.Errorf("error getting PEM bytes for thanos querier client certificate: %w", err)
112+
}
113+
s.Data["thanos-querier-client.crt"] = crt
114+
s.Data["thanos-querier-client.key"] = key
115+
}
116+
117+
{
118+
cfg, err := newCA.MakeServerCert(
119+
sets.NewString("prometheus-grpc"),
120+
crypto.DefaultCertificateLifetimeInDays,
121+
)
122+
if err != nil {
123+
return rotate, fmt.Errorf("error making server certificate: %w", err)
124+
}
125+
126+
crt, key, err := cfg.GetPEMBytes()
127+
if err != nil {
128+
return rotate, fmt.Errorf("error getting PEM bytes for prometheus-k8s server certificate: %w", err)
129+
}
130+
s.Data["prometheus-server.crt"] = crt
131+
s.Data["prometheus-server.key"] = key
132+
}
133+
134+
return rotate, nil
135+
}
136+
137+
// createCertificate creates a new certificate and returns it in x509.Certificate form.
138+
func createCertificate(template, parent *x509.Certificate, pub, priv interface{}) (*x509.Certificate, error) {
139+
rawCert, err := x509.CreateCertificate(rand.Reader, template, parent, pub, priv)
140+
if err != nil {
141+
return nil, fmt.Errorf("error creating certificate: %w", err)
142+
}
143+
parsedCerts, err := x509.ParseCertificates(rawCert)
144+
if err != nil {
145+
return nil, fmt.Errorf("error parsing certificate: %w", err)
146+
}
147+
return parsedCerts[0], nil
148+
}

pkg/controllers/monitoring/monitoring-stack/components.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
1515
"github.com/rhobs/observability-operator/pkg/reconciler"
16+
"github.com/rhobs/observability-operator/pkg/assets"
1617
)
1718

1819
const AdditionalScrapeConfigsSelfScrapeKey = "self-scrape-config"
@@ -189,12 +190,33 @@ func newPrometheus(
189190
}
190191
return []monv1.EnableFeature{}
191192
}(),
193+
Volumes: []corev1.Volume{
194+
{
195+
Name: "thanos-tls-assets",
196+
VolumeSource: corev1.VolumeSource{
197+
Secret: &corev1.SecretVolumeSource{
198+
SecretName: assets.GRPCSecretName,
199+
},
200+
},
201+
},
202+
},
192203
},
193204
Retention: ms.Spec.Retention,
194205
RuleSelector: prometheusSelector,
195206
RuleNamespaceSelector: ms.Spec.NamespaceSelector,
196207
Thanos: &monv1.ThanosSpec{
197208
Image: ptr.To(thanosCfg.Image),
209+
GRPCServerTLSConfig: &monv1.TLSConfig{
210+
CAFile: "/etc/thanos/tls-assets/ca.crt",
211+
CertFile: "/etc/thanos/tls-assets/prometheus-server.crt",
212+
KeyFile: "/etc/thanos/tls-assets/prometheus-server.key",
213+
},
214+
VolumeMounts: []corev1.VolumeMount{
215+
{
216+
Name: "thanos-tls-assets",
217+
MountPath: "/etc/thanos/tls-assets",
218+
},
219+
},
198220
},
199221
},
200222
}

pkg/controllers/monitoring/monitoring-stack/controller.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,17 @@ import (
2828
policyv1 "k8s.io/api/policy/v1"
2929
rbacv1 "k8s.io/api/rbac/v1"
3030
"k8s.io/apimachinery/pkg/api/errors"
31+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3132
"k8s.io/apimachinery/pkg/runtime"
33+
"k8s.io/apimachinery/pkg/types"
3234
ctrl "sigs.k8s.io/controller-runtime"
3335
"sigs.k8s.io/controller-runtime/pkg/builder"
3436
"sigs.k8s.io/controller-runtime/pkg/client"
3537
"sigs.k8s.io/controller-runtime/pkg/controller"
3638
"sigs.k8s.io/controller-runtime/pkg/predicate"
3739

3840
stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
41+
"github.com/rhobs/observability-operator/pkg/assets"
3942
)
4043

4144
type resourceManager struct {
@@ -133,6 +136,42 @@ func RegisterWithManager(mgr ctrl.Manager, opts Options) error {
133136
func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
134137
logger := rm.logger.WithValues("stack", req.NamespacedName)
135138
logger.Info("Reconciling monitoring stack")
139+
140+
gRPCSecret := v1.Secret{
141+
TypeMeta: metav1.TypeMeta{
142+
APIVersion: v1.SchemeGroupVersion.String(),
143+
Kind: "Secret",
144+
},
145+
ObjectMeta: metav1.ObjectMeta{
146+
Name: assets.GRPCSecretName,
147+
Namespace: req.Namespace,
148+
},
149+
Data: map[string][]byte{},
150+
}
151+
err := rm.k8sClient.Get(ctx,
152+
types.NamespacedName{
153+
Name: assets.GRPCSecretName,
154+
Namespace: req.Namespace,
155+
},
156+
&gRPCSecret)
157+
if client.IgnoreNotFound(err) != nil {
158+
return ctrl.Result{}, err
159+
}
160+
161+
rotate, err := assets.RotateGRPCSecret(&gRPCSecret, logger)
162+
if err != nil {
163+
return ctrl.Result{}, err
164+
}
165+
if rotate {
166+
err = rm.k8sClient.Update(ctx, &gRPCSecret)
167+
if errors.IsNotFound(err) {
168+
err = rm.k8sClient.Create(ctx, &gRPCSecret)
169+
}
170+
if err != nil {
171+
return ctrl.Result{}, err
172+
}
173+
}
174+
136175
ms, err := rm.getStack(ctx, req)
137176
if err != nil {
138177
// retry since some error has occured

pkg/controllers/monitoring/thanos-querier/components.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"k8s.io/utils/ptr"
1111

1212
msoapi "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
13+
"github.com/rhobs/observability-operator/pkg/assets"
1314
"github.com/rhobs/observability-operator/pkg/reconciler"
1415
)
1516

@@ -29,6 +30,11 @@ func newThanosQuerierDeployment(name string, spec *msoapi.ThanosQuerier, sidecar
2930
"--log.format=logfmt",
3031
"--query.replica-label=prometheus_replica",
3132
"--query.auto-downsampling",
33+
"--grpc-client-tls-secure",
34+
"--grpc-client-server-name=prometheus-grpc",
35+
"--grpc-client-tls-ca=/etc/thanos/tls-sidecar-assets/ca.crt",
36+
"--grpc-client-tls-key=/etc/thanos/tls-sidecar-assets/thanos-querier-client.key",
37+
"--grpc-client-tls-cert=/etc/thanos/tls-sidecar-assets/thanos-querier-client.crt",
3238
}
3339
for _, endpoint := range sidecarUrls {
3440
args = append(args, fmt.Sprintf("--endpoint=%s", endpoint))
@@ -86,6 +92,12 @@ func newThanosQuerierDeployment(name string, spec *msoapi.ThanosQuerier, sidecar
8692
Type: corev1.SeccompProfileTypeRuntimeDefault,
8793
},
8894
},
95+
VolumeMounts: []corev1.VolumeMount{
96+
{
97+
Name: "thanos-sidecar-tls-assets",
98+
MountPath: "/etc/thanos/tls-sidecar-assets",
99+
},
100+
},
89101
},
90102
},
91103
NodeSelector: map[string]string{
@@ -97,6 +109,16 @@ func newThanosQuerierDeployment(name string, spec *msoapi.ThanosQuerier, sidecar
97109
Type: corev1.SeccompProfileTypeRuntimeDefault,
98110
},
99111
},
112+
Volumes: []corev1.Volume{
113+
{
114+
Name: "thanos-sidecar-tls-assets",
115+
VolumeSource: corev1.VolumeSource{
116+
Secret: &corev1.SecretVolumeSource{
117+
SecretName: assets.GRPCSecretName,
118+
},
119+
},
120+
},
121+
},
100122
},
101123
},
102124
ProgressDeadlineSeconds: ptr.To(int32(300)),

0 commit comments

Comments
 (0)