Skip to content

Commit e22047a

Browse files
authored
feat(prometheus): add prometheus support (#134)
### Description This PR adds support for monitoring with Prometheus, by: - adding a new top-level `observability` parameter section - deploying a Prometheus server - conditionally exposing metrics endpoints on all EL/CL clients, as well as OP services - registering metrics jobs for each with Prometheus The `prometheus` module from the `ethereum-package` package was used as inspiration, but modified to add helper methods and improve the job-registration workflow. This PR has been tested and successfully deploys a Prometheus server with functioning metrics scrape jobs: <img width="1786" alt="image" src="https://github.com/user-attachments/assets/397d5c5c-2465-40cd-a18e-9c581fb30ace" />
1 parent d2aadba commit e22047a

21 files changed

+784
-315
lines changed

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,24 @@ The full YAML schema that can be passed in is as follows with the defaults provi
7575

7676
```yaml
7777
optimism_package:
78+
# Observability configuration
79+
observability:
80+
# Whether or not to configure observability (e.g. prometheus)
81+
enabled: true
82+
# Default prometheus configuration
83+
prometheus_params:
84+
storage_tsdb_retention_time: "1d"
85+
storage_tsdb_retention_size: "512MB"
86+
# Resource management for prometheus container
87+
# CPU is milicores
88+
# RAM is in MB
89+
min_cpu: 10
90+
max_cpu: 1000
91+
min_mem: 128
92+
max_mem: 2048
93+
# Prometheus docker image to use
94+
# Defaults to the latest image
95+
image: "prom/prometheus:latest"
7896
# Interop configuration
7997
interop:
8098
# Whether or not to enable interop mode

main.star

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ l2_launcher = import_module("./src/l2.star")
44
op_supervisor_launcher = import_module(
55
"./src/interop/op-supervisor/op_supervisor_launcher.star"
66
)
7+
8+
observability = import_module("./src/observability/observability.star")
9+
prometheus = import_module("./src/observability/prometheus/prometheus_launcher.star")
10+
711
wait_for_sync = import_module("./src/wait/wait_for_sync.star")
812
input_parser = import_module("./src/package_io/input_parser.star")
913
ethereum_package_static_files = import_module(
@@ -40,8 +44,11 @@ def run(plan, args):
4044
global_log_level = optimism_args_with_right_defaults.global_log_level
4145
persistent = optimism_args_with_right_defaults.persistent
4246

47+
observability_params = optimism_args_with_right_defaults.observability
4348
interop_params = optimism_args_with_right_defaults.interop
4449

50+
observability_helper = observability.make_helper(observability_params)
51+
4552
# Deploy the L1
4653
l1_network = ""
4754
if external_l1_args:
@@ -109,6 +116,7 @@ def run(plan, args):
109116
global_node_selectors,
110117
global_tolerations,
111118
persistent,
119+
observability_helper,
112120
interop_params,
113121
)
114122

@@ -120,6 +128,15 @@ def run(plan, args):
120128
all_participants,
121129
jwt_file,
122130
interop_params.supervisor_params,
131+
observability_helper,
132+
)
133+
134+
if observability_helper.enabled:
135+
plan.print("Launching prometheus...")
136+
prometheus_private_url = prometheus.launch_prometheus(
137+
plan,
138+
observability_helper,
139+
global_node_selectors,
123140
)
124141

125142

network_params.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ optimism_package:
22
chains:
33
- participants:
44
- el_type: op-geth
5-
el_image: ""
65
el_log_level: ""
76
el_extra_env_vars: {}
87
el_extra_labels: {}
@@ -14,7 +13,6 @@ optimism_package:
1413
el_min_mem: 0
1514
el_max_mem: 0
1615
cl_type: op-node
17-
cl_image: ""
1816
cl_log_level: ""
1917
cl_extra_env_vars: {}
2018
cl_extra_labels: {}
@@ -37,10 +35,8 @@ optimism_package:
3735
granite_time_offset: 0
3836
fund_dev_accounts: true
3937
batcher_params:
40-
image: ""
4138
extra_params: []
4239
mev_params:
43-
rollup_boost_image: ""
4440
builder_host: ""
4541
builder_port: ""
4642
additional_services: []

src/batcher/op-batcher/op_batcher_launcher.star

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ ethereum_package_constants = import_module(
66
"github.com/ethpandaops/ethereum-package/src/package_io/constants.star"
77
)
88

9+
observability = import_module("../../observability/observability.star")
10+
prometheus = import_module("../../observability/prometheus/prometheus_launcher.star")
11+
912
#
1013
# ---------------------------------- Batcher client -------------------------------------
1114
# The Docker container runs as the "op-batcher" user so we can't write to root
@@ -41,6 +44,7 @@ def launch(
4144
l1_config_env_vars,
4245
gs_batcher_private_key,
4346
batcher_params,
47+
observability_helper,
4448
):
4549
batcher_service_name = "{0}".format(service_name)
4650

@@ -53,6 +57,7 @@ def launch(
5357
l1_config_env_vars,
5458
gs_batcher_private_key,
5559
batcher_params,
60+
observability_helper,
5661
)
5762

5863
batcher_service = plan.add_service(service_name, config)
@@ -62,6 +67,8 @@ def launch(
6267
batcher_service.ip_address, batcher_http_port.number
6368
)
6469

70+
observability.register_op_service_metrics_job(observability_helper, batcher_service)
71+
6572
return "op_batcher"
6673

6774

@@ -74,7 +81,10 @@ def get_batcher_config(
7481
l1_config_env_vars,
7582
gs_batcher_private_key,
7683
batcher_params,
84+
observability_helper,
7785
):
86+
ports = dict(get_used_ports())
87+
7888
cmd = [
7989
"op-batcher",
8090
"--l2-eth-rpc=" + el_context.rpc_http_url,
@@ -93,9 +103,13 @@ def get_batcher_config(
93103
"--data-availability-type=blobs",
94104
]
95105

106+
# apply customizations
107+
108+
if observability_helper.enabled:
109+
observability.configure_op_service_metrics(cmd, ports)
110+
96111
cmd += batcher_params.extra_params
97112

98-
ports = get_used_ports()
99113
return ServiceConfig(
100114
image=image,
101115
ports=ports,

src/challenger/op-challenger/op_challenger_launcher.star

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ ethereum_package_constants = import_module(
66
"github.com/ethpandaops/ethereum-package/src/package_io/constants.star"
77
)
88

9+
observability = import_module("../../observability/observability.star")
10+
prometheus = import_module("../../observability/prometheus/prometheus_launcher.star")
11+
912
#
1013
# ---------------------------------- Challenger client -------------------------------------
1114
CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER = "/data/op-challenger/op-challenger-data"
@@ -29,6 +32,7 @@ def launch(
2932
deployment_output,
3033
network_params,
3134
challenger_params,
35+
observability_helper,
3236
):
3337
challenger_service_name = "{0}".format(service_name)
3438

@@ -44,10 +48,15 @@ def launch(
4448
deployment_output,
4549
network_params,
4650
challenger_params,
51+
observability_helper,
4752
)
4853

4954
challenger_service = plan.add_service(service_name, config)
5055

56+
observability.register_op_service_metrics_job(
57+
observability_helper, challenger_service
58+
)
59+
5160
return "op_challenger"
5261

5362

@@ -63,15 +72,22 @@ def get_challenger_config(
6372
deployment_output,
6473
network_params,
6574
challenger_params,
75+
observability_helper,
6676
):
77+
ports = dict(get_used_ports())
78+
6779
cmd = [
6880
"op-challenger",
6981
"--cannon-l2-genesis="
70-
+ ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS
71-
+ "/genesis-{0}.json".format(network_params.network_id),
82+
+ "{0}/genesis-{1}.json".format(
83+
ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS,
84+
network_params.network_id,
85+
),
7286
"--cannon-rollup-config="
73-
+ ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS
74-
+ "/rollup-{0}.json".format(network_params.network_id),
87+
+ "{0}/rollup-{1}.json".format(
88+
ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS,
89+
network_params.network_id,
90+
),
7591
"--game-factory-address=" + game_factory_address,
7692
"--datadir=" + CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER,
7793
"--l1-beacon=" + l1_config_env_vars["CL_RPC_URL"],
@@ -81,10 +97,18 @@ def get_challenger_config(
8197
"--rollup-rpc=" + cl_context.beacon_http_url,
8298
"--trace-type=" + "cannon,permissioned",
8399
]
100+
101+
# configure files
102+
84103
files = {
85104
ethereum_package_constants.GENESIS_DATA_MOUNTPOINT_ON_CLIENTS: deployment_output,
86105
}
87106

107+
# apply customizations
108+
109+
if observability_helper.enabled:
110+
observability.configure_op_service_metrics(cmd, ports)
111+
88112
if (
89113
challenger_params.cannon_prestate_path
90114
and challenger_params.cannon_prestates_url
@@ -107,7 +131,6 @@ def get_challenger_config(
107131
CHALLENGER_DATA_DIRPATH_ON_SERVICE_CONTAINER, " ".join(cmd)
108132
)
109133

110-
ports = get_used_ports()
111134
return ServiceConfig(
112135
image=image,
113136
ports=ports,

0 commit comments

Comments
 (0)