Skip to content

Commit 98cbc5a

Browse files
committed
feat: add ema for more smoother metrics calculations
1 parent ae2ed78 commit 98cbc5a

5 files changed

Lines changed: 357 additions & 23 deletions

File tree

.github/workflows/go-test.yml

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
name: Go Tests
2+
3+
on:
4+
push:
5+
branches: [ main, master ]
6+
paths:
7+
- '**.go'
8+
- 'go.mod'
9+
- 'go.sum'
10+
pull_request:
11+
branches: [ main, master ]
12+
paths:
13+
- '**.go'
14+
- 'go.mod'
15+
- 'go.sum'
16+
17+
jobs:
18+
test:
19+
name: Run Tests
20+
runs-on: ubuntu-latest
21+
steps:
22+
- name: Checkout code
23+
uses: actions/checkout@v4
24+
25+
- name: Set up Go
26+
uses: actions/setup-go@v5
27+
with:
28+
go-version: '1.21'
29+
check-latest: true
30+
31+
- name: Get dependencies
32+
run: go mod tidy
33+
34+
- name: Run tests
35+
run: go test -v -coverprofile=coverage.out ./...
36+
37+
- name: Upload coverage report
38+
uses: actions/upload-artifact@v4
39+
with:
40+
name: coverage-report
41+
path: coverage.out
42+
if-no-files-found: error
43+
44+
coverage:
45+
name: Coverage Analysis
46+
needs: test
47+
runs-on: ubuntu-latest
48+
steps:
49+
- name: Checkout code
50+
uses: actions/checkout@v4
51+
52+
- name: Set up Go
53+
uses: actions/setup-go@v5
54+
with:
55+
go-version: '1.21'
56+
check-latest: true
57+
58+
- name: Download coverage report
59+
uses: actions/download-artifact@v4
60+
with:
61+
name: coverage-report
62+
63+
- name: Display coverage
64+
run: |
65+
go tool cover -func=coverage.out

coverage.out

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
mode: set
2+
github.com/appwrite/monitoring/logger.go:24.20,28.2 1 1
3+
github.com/appwrite/monitoring/logger.go:30.82,34.2 3 0
4+
github.com/appwrite/monitoring/logger.go:36.58,39.2 2 0
5+
github.com/appwrite/monitoring/logger.go:41.62,44.2 2 0
6+
github.com/appwrite/monitoring/logger.go:46.59,49.2 2 0
7+
github.com/appwrite/monitoring/logger.go:51.60,54.2 2 0
8+
github.com/appwrite/monitoring/logger.go:56.59,59.2 2 0
9+
github.com/appwrite/monitoring/logger.go:61.60,64.2 2 0
10+
github.com/appwrite/monitoring/logger.go:66.60,69.2 2 0
11+
github.com/appwrite/monitoring/main.go:45.126,47.16 2 1
12+
github.com/appwrite/monitoring/main.go:54.2,69.8 3 1
13+
github.com/appwrite/monitoring/main.go:47.16,49.3 1 0
14+
github.com/appwrite/monitoring/main.go:72.81,74.2 1 1
15+
github.com/appwrite/monitoring/main.go:76.42,78.18 2 0
16+
github.com/appwrite/monitoring/main.go:81.2,81.19 1 0
17+
github.com/appwrite/monitoring/main.go:85.2,86.16 2 0
18+
github.com/appwrite/monitoring/main.go:90.2,90.26 1 0
19+
github.com/appwrite/monitoring/main.go:95.2,99.22 4 0
20+
github.com/appwrite/monitoring/main.go:105.2,115.29 2 0
21+
github.com/appwrite/monitoring/main.go:78.18,80.3 1 0
22+
github.com/appwrite/monitoring/main.go:81.19,83.3 1 0
23+
github.com/appwrite/monitoring/main.go:86.16,88.3 1 0
24+
github.com/appwrite/monitoring/main.go:90.26,92.3 1 0
25+
github.com/appwrite/monitoring/main.go:99.22,101.3 1 0
26+
github.com/appwrite/monitoring/main.go:101.8,103.3 1 0
27+
github.com/appwrite/monitoring/main.go:118.45,120.16 2 0
28+
github.com/appwrite/monitoring/main.go:124.2,128.22 4 0
29+
github.com/appwrite/monitoring/main.go:139.2,149.29 2 0
30+
github.com/appwrite/monitoring/main.go:120.16,122.3 1 0
31+
github.com/appwrite/monitoring/main.go:128.22,130.3 1 0
32+
github.com/appwrite/monitoring/main.go:130.8,137.3 1 0
33+
github.com/appwrite/monitoring/main.go:152.43,155.16 2 0
34+
github.com/appwrite/monitoring/main.go:159.2,163.22 4 0
35+
github.com/appwrite/monitoring/main.go:174.2,182.17 1 0
36+
github.com/appwrite/monitoring/main.go:187.2,188.16 2 0
37+
github.com/appwrite/monitoring/main.go:192.2,192.31 1 0
38+
github.com/appwrite/monitoring/main.go:228.2,228.12 1 0
39+
github.com/appwrite/monitoring/main.go:155.16,157.3 1 0
40+
github.com/appwrite/monitoring/main.go:163.22,165.3 1 0
41+
github.com/appwrite/monitoring/main.go:165.8,172.3 1 0
42+
github.com/appwrite/monitoring/main.go:182.17,184.3 1 0
43+
github.com/appwrite/monitoring/main.go:188.16,190.3 1 0
44+
github.com/appwrite/monitoring/main.go:192.31,194.17 2 0
45+
github.com/appwrite/monitoring/main.go:199.3,203.23 3 0
46+
github.com/appwrite/monitoring/main.go:215.3,223.18 1 0
47+
github.com/appwrite/monitoring/main.go:194.17,196.12 2 0
48+
github.com/appwrite/monitoring/main.go:203.23,205.4 1 0
49+
github.com/appwrite/monitoring/main.go:205.9,213.4 1 0
50+
github.com/appwrite/monitoring/main.go:223.18,225.4 1 0
51+
github.com/appwrite/monitoring/main.go:231.64,232.19 1 0
52+
github.com/appwrite/monitoring/main.go:235.2,235.15 1 0
53+
github.com/appwrite/monitoring/main.go:232.19,234.3 1 0
54+
github.com/appwrite/monitoring/main.go:238.57,240.16 2 0
55+
github.com/appwrite/monitoring/main.go:244.2,245.16 2 0
56+
github.com/appwrite/monitoring/main.go:249.2,254.16 5 0
57+
github.com/appwrite/monitoring/main.go:257.2,260.28 3 0
58+
github.com/appwrite/monitoring/main.go:264.2,264.12 1 0
59+
github.com/appwrite/monitoring/main.go:240.16,242.3 1 0
60+
github.com/appwrite/monitoring/main.go:245.16,247.3 1 0
61+
github.com/appwrite/monitoring/main.go:254.16,256.3 1 0
62+
github.com/appwrite/monitoring/main.go:260.28,262.3 1 0
63+
github.com/appwrite/monitoring/main.go:267.33,275.21 4 0
64+
github.com/appwrite/monitoring/main.go:275.21,277.3 1 0
65+
github.com/appwrite/monitoring/main.go:280.37,281.37 1 0
66+
github.com/appwrite/monitoring/main.go:285.2,285.40 1 0
67+
github.com/appwrite/monitoring/main.go:289.2,289.38 1 0
68+
github.com/appwrite/monitoring/main.go:281.37,283.3 1 0
69+
github.com/appwrite/monitoring/main.go:285.40,287.3 1 0
70+
github.com/appwrite/monitoring/main.go:289.38,291.3 1 0
71+
github.com/appwrite/monitoring/main.go:294.13,305.22 7 0
72+
github.com/appwrite/monitoring/main.go:310.2,313.27 2 0
73+
github.com/appwrite/monitoring/main.go:319.2,319.20 1 0
74+
github.com/appwrite/monitoring/main.go:322.2,322.38 1 0
75+
github.com/appwrite/monitoring/main.go:325.2,325.44 1 0
76+
github.com/appwrite/monitoring/main.go:328.2,328.40 1 0
77+
github.com/appwrite/monitoring/main.go:332.2,333.16 2 0
78+
github.com/appwrite/monitoring/main.go:337.2,343.17 6 0
79+
github.com/appwrite/monitoring/main.go:305.22,308.3 2 0
80+
github.com/appwrite/monitoring/main.go:313.27,316.3 2 0
81+
github.com/appwrite/monitoring/main.go:319.20,321.3 1 0
82+
github.com/appwrite/monitoring/main.go:322.38,324.3 1 0
83+
github.com/appwrite/monitoring/main.go:325.44,327.3 1 0
84+
github.com/appwrite/monitoring/main.go:328.40,330.3 1 0
85+
github.com/appwrite/monitoring/main.go:333.16,335.3 1 0

main.go

Lines changed: 52 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ type SystemMonitor struct {
3434
diskLimit float64
3535
interval int
3636
log *Logger
37+
38+
// EMA tracking
39+
cpuEMA float64
40+
memoryEMA float64
41+
diskEMA float64
42+
alpha float64 // EMA smoothing factor
3743
}
3844

3945
func NewSystemMonitor(betterStackURL string, interval int, cpuLimit, memoryLimit, diskLimit float64) (*SystemMonitor, error) {
@@ -42,6 +48,12 @@ func NewSystemMonitor(betterStackURL string, interval int, cpuLimit, memoryLimit
4248
return nil, fmt.Errorf("failed to get hostname: %v", err)
4349
}
4450

51+
// Calculate alpha based on interval to get roughly 5 minutes of smoothing
52+
// EMA formula: alpha = 2/(N+1) where N is the number of periods
53+
// For 5 minutes of smoothing with our interval: N = 300/interval
54+
N := float64(300) / float64(interval)
55+
alpha := 2.0 / (N + 1.0)
56+
4557
return &SystemMonitor{
4658
httpClient: &http.Client{
4759
Timeout: 5 * time.Second,
@@ -53,9 +65,14 @@ func NewSystemMonitor(betterStackURL string, interval int, cpuLimit, memoryLimit
5365
diskLimit: diskLimit,
5466
interval: interval,
5567
log: New(),
68+
alpha: alpha,
5669
}, nil
5770
}
5871

72+
func (s *SystemMonitor) calculateEMA(currentValue, previousEMA float64) float64 {
73+
return s.alpha*currentValue + (1-s.alpha)*previousEMA
74+
}
75+
5976
func (s *SystemMonitor) checkCPU() error {
6077
duration := float64(s.interval) / 10
6178
if duration < 5 {
@@ -74,12 +91,15 @@ func (s *SystemMonitor) checkCPU() error {
7491
return nil
7592
}
7693

77-
value := cpuPercent[0]
78-
status := s.getStatus(value, s.cpuLimit)
94+
// Calculate EMA for CPU usage
95+
instantValue := cpuPercent[0]
96+
s.cpuEMA = s.calculateEMA(instantValue, s.cpuEMA)
97+
98+
status := s.getStatus(s.cpuEMA, s.cpuLimit)
7999
if status == "fail" {
80-
s.log.Warn("CPU usage %.2f%% exceeds limit of %.2f%%", value, s.cpuLimit)
100+
s.log.Warn("CPU usage EMA %.2f%% exceeds limit of %.2f%% (instant: %.2f%%)", s.cpuEMA, s.cpuLimit, instantValue)
81101
} else {
82-
s.log.Log("CPU usage: %.2f%% (limit: %.2f%%)", value, s.cpuLimit)
102+
s.log.Log("CPU usage EMA: %.2f%% (limit: %.2f%%, instant: %.2f%%)", s.cpuEMA, s.cpuLimit, instantValue)
83103
}
84104

85105
metric := Metric{
@@ -88,7 +108,7 @@ func (s *SystemMonitor) checkCPU() error {
88108
AlertID: fmt.Sprintf("cpu-%s", s.hostname),
89109
Timestamp: time.Now().Unix(),
90110
Status: status,
91-
Value: value,
111+
Value: s.cpuEMA,
92112
Limit: s.cpuLimit,
93113
}
94114

@@ -101,14 +121,17 @@ func (s *SystemMonitor) checkMemory() error {
101121
return fmt.Errorf("failed to get memory stats: %v", err)
102122
}
103123

104-
value := vmStat.UsedPercent
105-
status := s.getStatus(value, s.memoryLimit)
124+
instantValue := vmStat.UsedPercent
125+
s.memoryEMA = s.calculateEMA(instantValue, s.memoryEMA)
126+
127+
status := s.getStatus(s.memoryEMA, s.memoryLimit)
106128
if status == "fail" {
107-
s.log.Warn("Memory usage %.2f%% exceeds limit of %.2f%%", value, s.memoryLimit)
129+
s.log.Warn("Memory usage EMA %.2f%% exceeds limit of %.2f%% (instant: %.2f%%)", s.memoryEMA, s.memoryLimit, instantValue)
108130
} else {
109-
s.log.Log("Memory usage: %.2f%% (limit: %.2f%%), Available: %d MB, Total: %d MB",
110-
value,
131+
s.log.Log("Memory usage EMA: %.2f%% (limit: %.2f%%, instant: %.2f%%), Available: %d MB, Total: %d MB",
132+
s.memoryEMA,
111133
s.memoryLimit,
134+
instantValue,
112135
vmStat.Available/(1024*1024),
113136
vmStat.Total/(1024*1024))
114137
}
@@ -119,7 +142,7 @@ func (s *SystemMonitor) checkMemory() error {
119142
AlertID: fmt.Sprintf("memory-%s", s.hostname),
120143
Timestamp: time.Now().Unix(),
121144
Status: status,
122-
Value: value,
145+
Value: s.memoryEMA,
123146
Limit: s.memoryLimit,
124147
}
125148

@@ -133,14 +156,17 @@ func (s *SystemMonitor) checkDisk() error {
133156
return fmt.Errorf("failed to get disk usage: %v", err)
134157
}
135158

136-
value := usage.UsedPercent
137-
status := s.getStatus(value, s.diskLimit)
159+
instantValue := usage.UsedPercent
160+
s.diskEMA = s.calculateEMA(instantValue, s.diskEMA)
161+
162+
status := s.getStatus(s.diskEMA, s.diskLimit)
138163
if status == "fail" {
139-
s.log.Warn("Root disk usage %.2f%% exceeds limit of %.2f%%", value, s.diskLimit)
164+
s.log.Warn("Root disk usage EMA %.2f%% exceeds limit of %.2f%% (instant: %.2f%%)", s.diskEMA, s.diskLimit, instantValue)
140165
} else {
141-
s.log.Log("Root disk usage: %.2f%% (limit: %.2f%%), Free: %d MB, Total: %d MB",
142-
value,
166+
s.log.Log("Root disk usage EMA: %.2f%% (limit: %.2f%%, instant: %.2f%%), Free: %d MB, Total: %d MB",
167+
s.diskEMA,
143168
s.diskLimit,
169+
instantValue,
144170
usage.Free/(1024*1024),
145171
usage.Total/(1024*1024))
146172
}
@@ -151,7 +177,7 @@ func (s *SystemMonitor) checkDisk() error {
151177
AlertID: fmt.Sprintf("disk-root-%s", s.hostname),
152178
Timestamp: time.Now().Unix(),
153179
Status: status,
154-
Value: value,
180+
Value: s.diskEMA,
155181
Limit: s.diskLimit,
156182
}); err != nil {
157183
return err
@@ -170,15 +196,18 @@ func (s *SystemMonitor) checkDisk() error {
170196
continue
171197
}
172198

173-
value := usage.UsedPercent
174-
status := s.getStatus(value, s.diskLimit)
199+
instantValue := usage.UsedPercent
200+
// For mounted directories, we'll use the same EMA as root for simplicity
201+
// In a more sophisticated implementation, we might want separate EMAs for each mount
202+
status := s.getStatus(s.diskEMA, s.diskLimit)
175203
if status == "fail" {
176-
s.log.Warn("Disk usage for %s %.2f%% exceeds limit of %.2f%%", mount, value, s.diskLimit)
204+
s.log.Warn("Disk usage for %s EMA %.2f%% exceeds limit of %.2f%% (instant: %.2f%%)", mount, s.diskEMA, s.diskLimit, instantValue)
177205
} else {
178-
s.log.Log("Disk usage for %s: %.2f%% (limit: %.2f%%), Free: %d MB, Total: %d MB",
206+
s.log.Log("Disk usage for %s EMA: %.2f%% (limit: %.2f%%, instant: %.2f%%), Free: %d MB, Total: %d MB",
179207
mount,
180-
value,
208+
s.diskEMA,
181209
s.diskLimit,
210+
instantValue,
182211
usage.Free/(1024*1024),
183212
usage.Total/(1024*1024))
184213
}
@@ -189,7 +218,7 @@ func (s *SystemMonitor) checkDisk() error {
189218
AlertID: fmt.Sprintf("disk-%s-%s", filepath.Base(mount), s.hostname),
190219
Timestamp: time.Now().Unix(),
191220
Status: status,
192-
Value: value,
221+
Value: s.diskEMA,
193222
Limit: s.diskLimit,
194223
}); err != nil {
195224
return err

0 commit comments

Comments
 (0)