Skip to content

Commit 64248b4

Browse files
Your Namevmcj
authored andcommitted
Collect syslog lines next to metrics
This makes that we can show the last log entries next to a metric to detect certain issues such as why a CPU clocks down or why a process stops for example.
1 parent acf826a commit 64248b4

File tree

10 files changed

+172
-96
lines changed

10 files changed

+172
-96
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
/ssl.*
22
/grafana.deb
3+
/loki-linux-amd64.zip

provision-contest/ansible/roles/grafana/files/loki-local-config.yaml

Lines changed: 13 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,45 +2,26 @@ auth_enabled: false
22

33
server:
44
http_listen_port: 3100
5-
grpc_listen_port: 9096
5+
grpc_listen_port: 13100
66

7-
ingester:
8-
lifecycler:
9-
address: 127.0.0.1
10-
ring:
11-
kvstore:
12-
store: inmemory
13-
replication_factor: 1
14-
final_sleep: 0s
15-
chunk_idle_period: 5m
16-
chunk_retain_period: 30s
17-
max_transfer_retries: 0
7+
common:
8+
ring:
9+
instance_addr: 127.0.0.1
10+
kvstore:
11+
store: inmemory
12+
replication_factor: 1
13+
path_prefix: /tmp/loki
1814

1915
schema_config:
2016
configs:
21-
- from: 2018-04-15
22-
store: boltdb
17+
- from: 2020-05-15
18+
store: tsdb
2319
object_store: filesystem
24-
schema: v11
20+
schema: v13
2521
index:
2622
prefix: index_
27-
period: 168h
23+
period: 24h
2824

2925
storage_config:
30-
boltdb:
31-
directory: /data/loki/index
32-
3326
filesystem:
34-
directory: /data/loki/chunks
35-
36-
limits_config:
37-
enforce_metric_name: false
38-
reject_old_samples: true
39-
reject_old_samples_max_age: 168h
40-
41-
chunk_store_config:
42-
max_look_back_period: 0s
43-
44-
table_manager:
45-
retention_deletes_enabled: false
46-
retention_period: 0s
27+
directory: /tmp/loki/chunks

provision-contest/ansible/roles/grafana/tasks/main.yml

Lines changed: 51 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -19,48 +19,56 @@
1919
notify: Restart prometheus
2020

2121
# Setup loki which gathers our logs
22-
- name: Install loki
23-
unarchive:
24-
src: https://github.com/grafana/loki/releases/download/v2.5.0/loki-linux-amd64.zip
25-
dest: /usr/bin/
26-
remote_src: true
27-
owner: domjudge
28-
group: domjudge
29-
when: loki
30-
31-
- name: Dir for loki settings
32-
file:
33-
state: directory
34-
path: /etc/grafana/loki/
35-
owner: root
36-
group: root
37-
mode: 0755
38-
when: loki
39-
40-
- name: Set loki settings
41-
copy:
42-
src: loki-local-config.yaml
43-
dest: /etc/grafana/loki/
44-
owner: root
45-
group: root
46-
mode: 0644
47-
when: loki
48-
notify: Restart loki
49-
50-
- name: Setup loki systemd
51-
copy:
52-
src: loki.service
53-
dest: /etc/systemd/system/
54-
mode: 0655
55-
when: loki
56-
notify: Restart loki
57-
58-
- name: Start loki service
59-
service:
60-
name: loki
61-
state: started
62-
enabled: true
22+
- name: Setup loki
6323
when: loki
24+
block:
25+
- name: Install loki
26+
unarchive:
27+
src: loki-linux-amd64.zip
28+
dest: /usr/bin/
29+
remote_src: false
30+
owner: domjudge
31+
group: domjudge
32+
when: ICPC_IMAGE
33+
34+
- name: Install loki
35+
unarchive:
36+
src: https://github.com/grafana/loki/releases/download/v2.5.0/loki-linux-amd64.zip
37+
dest: /usr/bin/
38+
remote_src: true
39+
owner: domjudge
40+
group: domjudge
41+
when: not ICPC_IMAGE
42+
43+
- name: Dir for loki settings
44+
file:
45+
state: directory
46+
path: /etc/grafana/loki/
47+
owner: root
48+
group: root
49+
mode: 0755
50+
51+
- name: Set loki settings
52+
copy:
53+
src: loki-local-config.yaml
54+
dest: /etc/grafana/loki/
55+
owner: root
56+
group: root
57+
mode: 0644
58+
notify: Restart loki
59+
60+
- name: Setup loki systemd
61+
copy:
62+
src: loki.service
63+
dest: /etc/systemd/system/
64+
mode: 0655
65+
notify: Restart loki
66+
67+
- name: Start loki service
68+
service:
69+
name: loki
70+
state: started
71+
enabled: true
6472

6573
## Setup grafana
6674
- name: Install grafana
@@ -91,8 +99,8 @@
9199
notify: Restart grafana
92100

93101
- name: Set up grafana datasources
94-
synchronize:
95-
src: files/grafana/datasources.yml
102+
template:
103+
src: datasources.yml.j2
96104
dest: /etc/grafana/provisioning/datasources/default.yml
97105
notify: Restart grafana
98106

provision-contest/ansible/roles/grafana/files/datasources.yml renamed to provision-contest/ansible/roles/grafana/templates/datasources.yml.j2

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,14 @@ datasources:
4848
version: 1
4949
# <bool> allow users to edit datasources from the UI.
5050
editable: true
51+
{% if loki is defined and loki %}
52+
- name: Loki
53+
type: loki
54+
access: proxy
55+
url: http://localhost:3100
56+
jsonData:
57+
timeout: 60
58+
maxLines: 1000
59+
httpHeaderName1: Connection
60+
httpHeaderName2: Upgrade
61+
{% endif %}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
11
prom: true
2+
GROUP_PREFIXES:
3+
- 'online-'
4+
- ''
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
*.key
22
*.crt
3+
promtail-linux-amd64.zip

provision-contest/ansible/roles/prometheus_target_all/files/promtail.service

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ After=network.target
44

55
[Service]
66
Type=simple
7-
ExecStart=/usr/bin/promtail-linux-amd64 --config.file /etc/promtail/promtail-local-config.yaml
7+
ExecStart=/usr/bin/promtail-linux-amd64 --config.file /etc/promtail/promtail-local-config.yml
88

99
[Install]
1010
WantedBy=multi-user.target

provision-contest/ansible/roles/prometheus_target_all/tasks/main.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,57 @@
5555
regexp: '^ARGS=""'
5656
line: 'ARGS="--web.config /etc/prometheus/prometheus-authentication.yml"'
5757
notify: Restart node-exporter
58+
59+
# Setup promtail which sends our logs
60+
- name: Setup promtail to ship logs to loki (and grafana)
61+
when: prom
62+
block:
63+
- name: Install promtail
64+
unarchive:
65+
src: promtail-linux-amd64.zip
66+
dest: /usr/bin/
67+
remote_src: false
68+
owner: domjudge
69+
group: domjudge
70+
when: ICPC_IMAGE
71+
72+
- name: Install promtail
73+
unarchive:
74+
src: https://github.com/grafana/loki/releases/download/v3.0.1/promtail-linux-amd64.zip
75+
dest: /usr/bin/
76+
remote_src: true
77+
owner: domjudge
78+
group: domjudge
79+
when: not ICPC_IMAGE
80+
81+
- name: Dir for promtail settings
82+
file:
83+
state: directory
84+
path: /etc/promtail
85+
owner: root
86+
group: root
87+
mode: 0755
88+
89+
- name: Set promtail settings
90+
template:
91+
src: promtail-local-config.yml.j2
92+
dest: /etc/promtail/promtail-local-config.yml
93+
owner: root
94+
group: root
95+
mode: 0644
96+
notify: Restart promtail
97+
98+
- name: Setup promtail systemd
99+
copy:
100+
src: promtail.service
101+
dest: /etc/systemd/system/
102+
mode: 0655
103+
owner: root
104+
group: root
105+
notify: Restart promtail
106+
107+
- name: Start promtail service
108+
service:
109+
name: promtail
110+
state: started
111+
enabled: true

provision-contest/ansible/roles/prometheus_target_all/templates/promtail-local-config.yaml.j2

Lines changed: 0 additions & 20 deletions
This file was deleted.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
server:
2+
http_listen_port: 9080
3+
grpc_listen_port: 19080
4+
5+
positions:
6+
filename: /var/tmp/promtail-syslog-positions.yml
7+
8+
clients:
9+
{% for host in groups["grafana"] %}
10+
- url: http://{{ hostvars[host].ansible_host }}:3100/loki/api/v1/push
11+
{% endfor %}
12+
13+
scrape_configs:
14+
- job_name: system
15+
static_configs:
16+
- labels:
17+
__path__: /var/log/**/*log
18+
{% for group_prefix in GROUP_PREFIXES %}
19+
{% if ansible_fqdn in groups[group_prefix+'judgehost'] %}
20+
- job_name: judgehostlogs
21+
static_configs:
22+
- labels:
23+
__path__: '/opt/domjudge/output/log/*'
24+
{% endif %}
25+
{% if ansible_fqdn in groups[group_prefix+'domserver'] %}
26+
- job_name: webapplogs
27+
static_configs:
28+
- labels:
29+
__path__: '/opt/domjudge/webapp/var/log/*'
30+
{% endif %}
31+
{% endfor %}
32+
# See: https://alexandre.deverteuil.net/post/syslog-relay-for-loki/
33+
- job_name: syslog
34+
syslog:
35+
listen_address: 0.0.0.0:1514
36+
labels:
37+
job: syslog

0 commit comments

Comments
 (0)