diff --git a/Tiltfile b/Tiltfile index f19f041446ae..60f033f92209 100644 --- a/Tiltfile +++ b/Tiltfile @@ -420,9 +420,9 @@ def deploy_provider_crds(): ) def deploy_observability(): - if "promtail" in settings.get("deploy_observability", []): - k8s_yaml(read_file("./.tiltbuild/yaml/promtail.observability.yaml"), allow_duplicates = True) - k8s_resource(workload = "promtail", extra_pod_selectors = [{"app": "promtail"}], labels = ["observability"], resource_deps = ["loki"], objects = ["promtail:serviceaccount"]) + if "alloy" in settings.get("deploy_observability", []): + k8s_yaml(read_file("./.tiltbuild/yaml/alloy.observability.yaml"), allow_duplicates = True) + k8s_resource(workload = "alloy", extra_pod_selectors = [{"app": "alloy"}], labels = ["observability"], resource_deps = ["loki"], objects = ["alloy:serviceaccount"]) if "loki" in settings.get("deploy_observability", []): k8s_yaml(read_file("./.tiltbuild/yaml/loki.observability.yaml"), allow_duplicates = True) diff --git a/docs/book/src/developer/core/logging.md b/docs/book/src/developer/core/logging.md index 2b6b0c02c476..b8fb97c4aa86 100644 --- a/docs/book/src/developer/core/logging.md +++ b/docs/book/src/developer/core/logging.md @@ -135,19 +135,19 @@ thorny parts of code. Over time, based on feedback from SRE/developers, more log ## Developing and testing logs -Our [Tilt](tilt.md) setup offers a batteries-included log suite based on [Promtail](https://grafana.com/docs/loki/latest/clients/promtail/), [Loki](https://grafana.com/docs/loki/latest/fundamentals/overview/) and [Grafana](https://grafana.com/docs/grafana/latest/explore/logs-integration/). +Our [Tilt](tilt.md) setup offers a batteries-included log suite based on [alloy](https://grafana.com/docs/loki/latest/send-data/alloy/), [Loki](https://grafana.com/docs/loki/latest/fundamentals/overview/) and [Grafana](https://grafana.com/docs/grafana/latest/explore/logs-integration/). We are working to continuously improving this experience, allowing Cluster API developers to use logs and improve them as part of their development process. For the best experience exploring the logs using Tilt: 1. Set `--logging-format=json`. 2. Set a high log verbosity, e.g. `v=5`. -3. Enable Promtail, Loki, and Grafana under `deploy_observability`. +3. Enable alloy, Loki, and Grafana under `deploy_observability`. A minimal example of a tilt-settings.yaml file that deploys a ready-to-use logging suite looks like: ```yaml deploy_observability: - - promtail + - alloy - loki - grafana enable_providers: @@ -208,7 +208,6 @@ Will return logs from the `capi-controller-manager`, associated with the Cluster Will return the logs from four CAPI providers - the Core provider, Kubeadm Control Plane provider, Kubeadm Bootstrap provider and the Docker infrastructure provider. It filters by the cluster name and the machine name and then formats the log lines to show just the source controller and the message. This allows us to correlate logs and see actions taken by each of these four providers related to the machine `my-cluster-linux-worker-1`. For more information on formatting and filtering logs using Grafana and Loki see: -- [json parsing](https://grafana.com/docs/loki/latest/clients/promtail/stages/json/) - [log queries](https://grafana.com/docs/loki/latest/logql/log_queries/) ## What about providers diff --git a/docs/book/src/developer/core/testing.md b/docs/book/src/developer/core/testing.md index b0dff071a918..82d5fe8d23c2 100644 --- a/docs/book/src/developer/core/testing.md +++ b/docs/book/src/developer/core/testing.md @@ -308,7 +308,7 @@ analyzing them via Grafana. 1. Start the development environment as described in [Developing Cluster API with Tilt](tilt.md). * Make sure to deploy Loki and Grafana via `deploy_observability`. - * If you only want to see imported logs, don't deploy promtail (via `deploy_observability`). + * If you only want to see imported logs, don't deploy alloy (via `deploy_observability`). * If you want to drop all logs from Loki, just delete the Loki Pod in the `observability` namespace. 2. You can then import logs via the `Import Logs` button on the top right of the [Loki resource page](http://localhost:10350/r/loki/overview). Just click on the downwards arrow, enter either a ProwJob URL, a GCS path or a local folder and click on `Import Logs`. diff --git a/docs/book/src/developer/core/tilt.md b/docs/book/src/developer/core/tilt.md index ebe0adf7a22b..5c6cdb66d183 100644 --- a/docs/book/src/developer/core/tilt.md +++ b/docs/book/src/developer/core/tilt.md @@ -297,7 +297,7 @@ Supported values are: * `loki`: To receive and store logs. * `metrics-server`: To enable `kubectl top node/pod`. * `prometheus`*: For collecting metrics from Kubernetes. -* `promtail`: For providing pod logs to `loki`. +* `alloy`: For providing pod logs to `loki`. * `parca`*: For visualizing profiling data. * `tempo`: To store traces. * `visualizer`*: Visualize Cluster API resources for each cluster, provide quick access to the specs and status of any resource. diff --git a/docs/book/src/developer/core/tuning.md b/docs/book/src/developer/core/tuning.md index 19fb5fd018ff..964fb32e4b7f 100644 --- a/docs/book/src/developer/core/tuning.md +++ b/docs/book/src/developer/core/tuning.md @@ -9,7 +9,7 @@ When tuning controllers, both for scalability, performance or for reducing their Cluster API provides a full stack of tools for tuning its own controllers as well as controllers for all providers if developed using controller runtime. As a bonus, most of this tooling can be used with any other controller runtime based controllers. -With tilt, you can easily deploy a full observability stack with Grafana, Loki, promtail, Prometheus, kube-state-metrics, Parca and Tempo. +With tilt, you can easily deploy a full observability stack with Grafana, Loki, alloy, Prometheus, kube-state-metrics, Parca and Tempo. All tools are preconfigured, and most notably kube-state-metrics already collects CAPI metrics and Grafana is configured with a set of dashboards that we used in previous rounds of CAPI tuning. Overall, the CAPI dev environment offers a considerable amount of expertise, free to use and to improve for the entire community. We highly recommend to invest time in looking into those tools, learn and provide feedback. diff --git a/hack/observability/alloy/config.alloy b/hack/observability/alloy/config.alloy new file mode 100644 index 000000000000..88e8889ec55e --- /dev/null +++ b/hack/observability/alloy/config.alloy @@ -0,0 +1,120 @@ +discovery.kubernetes "kubernetes_pods" { + role = "pod" +} + +discovery.relabel "kubernetes_pods" { + targets = discovery.kubernetes.kubernetes_pods.targets + + rule { + source_labels = ["__meta_kubernetes_pod_controller_name"] + regex = "([0-9a-z-.]+?)(-[0-9a-f]{8,10})?" + target_label = "__tmp_controller_name" + } + + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app", "__tmp_controller_name", "__meta_kubernetes_pod_name"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "app" + } + + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_instance", "__meta_kubernetes_pod_label_instance"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "instance" + } + + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_component", "__meta_kubernetes_pod_label_component"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "component" + } + + rule { + source_labels = ["__meta_kubernetes_pod_node_name"] + target_label = "node_name" + } + + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + rule { + source_labels = ["namespace", "app"] + separator = "/" + target_label = "job" + } + + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + rule { + source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] + separator = "/" + target_label = "__path__" + replacement = "/var/log/pods/*$1/*.log" + } + + rule { + source_labels = ["__meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash", "__meta_kubernetes_pod_annotation_kubernetes_io_config_hash", "__meta_kubernetes_pod_container_name"] + separator = "/" + regex = "true/(.*)" + target_label = "__path__" + replacement = "/var/log/pods/*$1/*.log" + } +} + +local.file_match "kubernetes_pods" { + path_targets = discovery.relabel.kubernetes_pods.output +} + +loki.process "kubernetes_pods" { + forward_to = [loki.write.default.receiver] + + stage.cri { } + + stage.json { + expressions = { + Cluster = "join('/',[Cluster.namespace,Cluster.name])", + KubeadmControlPlane = "join('/',[KubeadmControlPlane.namespace,KubeadmControlPlane.name])", + Machine = "join('/',[Machine.namespace,Machine.name])", + MachineDeployment = "join('/',[MachineDeployment.namespace,MachineDeployment.name])", + MachinePool = "join('/',[MachinePool.namespace,MachinePool.name])", + MachineSet = "join('/',[MachineSet.namespace,MachineSet.name])", + controller = "", + } + } + + stage.labels { + values = { + Cluster = null, + KubeadmControlPlane = null, + Machine = null, + MachineDeployment = null, + MachinePool = null, + MachineSet = null, + controller = null, + } + } +} + +loki.source.file "kubernetes_pods" { + targets = local.file_match.kubernetes_pods.targets + forward_to = [loki.process.kubernetes_pods.receiver] + legacy_positions_file = "/run/promtail/positions.yaml" +} + +loki.write "default" { + endpoint { + url = "http://loki:3100/loki/api/v1/push" + tenant_id = "1" + } + external_labels = {} +} \ No newline at end of file diff --git a/hack/observability/promtail/kustomization.yaml b/hack/observability/alloy/kustomization.yaml similarity index 87% rename from hack/observability/promtail/kustomization.yaml rename to hack/observability/alloy/kustomization.yaml index 07821a5000e9..3ac807ab325c 100644 --- a/hack/observability/promtail/kustomization.yaml +++ b/hack/observability/alloy/kustomization.yaml @@ -2,12 +2,14 @@ resources: - ../namespace.yaml helmCharts: - - name: promtail + - name: alloy repo: https://grafana.github.io/helm-charts - releaseName: promtail - namespace: observability + releaseName: alloy + version: 0.12.5 valuesFile: values.yaml - version: 6.16.6 + namespace: observability + + helmGlobals: # Store chart in ".charts" folder instead of "charts". diff --git a/hack/observability/alloy/values.yaml b/hack/observability/alloy/values.yaml new file mode 100644 index 000000000000..111de8bda884 --- /dev/null +++ b/hack/observability/alloy/values.yaml @@ -0,0 +1,134 @@ +# Configuration for alloy - https://github.com/grafana/alloy/blob/main/operations/helm/charts/alloy/values.yaml +alloy: + configMap: + create: true + content: |- + discovery.kubernetes "kubernetes_pods" { + role = "pod" + } + + discovery.relabel "kubernetes_pods" { + targets = discovery.kubernetes.kubernetes_pods.targets + + rule { + source_labels = ["__meta_kubernetes_pod_controller_name"] + regex = "([0-9a-z-.]+?)(-[0-9a-f]{8,10})?" + target_label = "__tmp_controller_name" + } + + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_name", "__meta_kubernetes_pod_label_app", "__tmp_controller_name", "__meta_kubernetes_pod_name"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "app" + } + + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_instance", "__meta_kubernetes_pod_label_instance"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "instance" + } + + rule { + source_labels = ["__meta_kubernetes_pod_label_app_kubernetes_io_component", "__meta_kubernetes_pod_label_component"] + regex = "^;*([^;]+)(;.*)?$" + target_label = "component" + } + + rule { + source_labels = ["__meta_kubernetes_pod_node_name"] + target_label = "node_name" + } + + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + rule { + source_labels = ["namespace", "app"] + separator = "/" + target_label = "job" + } + + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + rule { + source_labels = ["__meta_kubernetes_pod_uid", "__meta_kubernetes_pod_container_name"] + separator = "/" + target_label = "__path__" + replacement = "/var/log/pods/*$1/*.log" + } + + rule { + source_labels = ["__meta_kubernetes_pod_annotationpresent_kubernetes_io_config_hash", "__meta_kubernetes_pod_annotation_kubernetes_io_config_hash", "__meta_kubernetes_pod_container_name"] + separator = "/" + regex = "true/(.*)" + target_label = "__path__" + replacement = "/var/log/pods/*$1/*.log" + } + } + + local.file_match "kubernetes_pods" { + path_targets = discovery.relabel.kubernetes_pods.output + } + + loki.process "kubernetes_pods" { + forward_to = [loki.write.default.receiver] + + stage.cri { } + + stage.json { + expressions = { + Cluster = "join('/',[Cluster.namespace,Cluster.name])", + KubeadmControlPlane = "join('/',[KubeadmControlPlane.namespace,KubeadmControlPlane.name])", + Machine = "join('/',[Machine.namespace,Machine.name])", + MachineDeployment = "join('/',[MachineDeployment.namespace,MachineDeployment.name])", + MachinePool = "join('/',[MachinePool.namespace,MachinePool.name])", + MachineSet = "join('/',[MachineSet.namespace,MachineSet.name])", + controller = "", + } + } + + stage.labels { + values = { + Cluster = null, + KubeadmControlPlane = null, + Machine = null, + MachineDeployment = null, + MachinePool = null, + MachineSet = null, + controller = null, + } + } + } + + loki.source.file "kubernetes_pods" { + targets = local.file_match.kubernetes_pods.targets + forward_to = [loki.process.kubernetes_pods.receiver] + legacy_positions_file = "/run/promtail/positions.yaml" + } + + loki.write "default" { + endpoint { + url = "http://loki:3100/loki/api/v1/push" + tenant_id = "1" + } + external_labels = {} + } + mounts: + varlog: true + run: + enabled: true + path: /run + podSecurityContext: + runAsUser: 0 + runAsGroup: 0 + fsGroup: 0 diff --git a/hack/observability/grafana/chart/values.yaml b/hack/observability/grafana/chart/values.yaml index a9411253874c..4bde2a178383 100644 --- a/hack/observability/grafana/chart/values.yaml +++ b/hack/observability/grafana/chart/values.yaml @@ -30,7 +30,7 @@ datasources: isDefault: true editable: true # This header has to be set as we had to set an OrgID - # in promtail to be able to push the logs to Loki. + # in alloy to be able to push the logs to Loki. jsonData: maxLines: 1000 derivedFields: diff --git a/hack/observability/promtail/values.yaml b/hack/observability/promtail/values.yaml deleted file mode 100644 index ed419ef9c045..000000000000 --- a/hack/observability/promtail/values.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Configuration for promtail chart, see https://github.com/grafana/helm-charts/tree/main/charts/promtail - -config: - # publish data to loki - clients: - - url: http://loki:3100/loki/api/v1/push - # We have to set the tenant_id/OrgID to push logs to Loki - # otherwise we get an auth error. - tenant_id: 1 - - snippets: - pipelineStages: - # Parse cluster and machine to make them available as labels. - - cri: {} - - json: - expressions: - controller: - Cluster: join('/',[Cluster.namespace,Cluster.name]) - Machine: join('/',[Machine.namespace,Machine.name]) - KubeadmControlPlane: join('/',[KubeadmControlPlane.namespace,KubeadmControlPlane.name]) - MachineDeployment: join('/',[MachineDeployment.namespace,MachineDeployment.name]) - MachineSet: join('/',[MachineSet.namespace,MachineSet.name]) - MachinePool: join('/',[MachinePool.namespace,MachinePool.name]) - - labels: - controller: - Cluster: - Machine: - KubeadmControlPlane: - MachineDeployment: - MachineSet: - MachinePool: diff --git a/test/framework/deployment_helpers.go b/test/framework/deployment_helpers.go index 270bcbe7c3d1..8276aa67e8ef 100644 --- a/test/framework/deployment_helpers.go +++ b/test/framework/deployment_helpers.go @@ -329,7 +329,7 @@ func (eh *watchPodLogsEventHandler) streamPodLogs(pod *corev1.Pod) { } // logMetadata contains metadata about the logs. -// The format is very similar to the one used by promtail. +// The format is very similar to the one used by alloy. type logMetadata struct { Job string `json:"job"` Namespace string `json:"namespace"`