@@ -34,6 +34,12 @@ type SystemMonitor struct {
3434 diskLimit float64
3535 interval int
3636 log * Logger
37+
38+ // EMA tracking
39+ cpuEMA float64
40+ memoryEMA float64
41+ diskEMA float64
42+ alpha float64 // EMA smoothing factor
3743}
3844
3945func NewSystemMonitor (betterStackURL string , interval int , cpuLimit , memoryLimit , diskLimit float64 ) (* SystemMonitor , error ) {
@@ -42,6 +48,12 @@ func NewSystemMonitor(betterStackURL string, interval int, cpuLimit, memoryLimit
4248 return nil , fmt .Errorf ("failed to get hostname: %v" , err )
4349 }
4450
51+ // Calculate alpha based on interval to get roughly 5 minutes of smoothing
52+ // EMA formula: alpha = 2/(N+1) where N is the number of periods
53+ // For 5 minutes of smoothing with our interval: N = 300/interval
54+ N := float64 (300 ) / float64 (interval )
55+ alpha := 2.0 / (N + 1.0 )
56+
4557 return & SystemMonitor {
4658 httpClient : & http.Client {
4759 Timeout : 5 * time .Second ,
@@ -53,9 +65,14 @@ func NewSystemMonitor(betterStackURL string, interval int, cpuLimit, memoryLimit
5365 diskLimit : diskLimit ,
5466 interval : interval ,
5567 log : New (),
68+ alpha : alpha ,
5669 }, nil
5770}
5871
72+ func (s * SystemMonitor ) calculateEMA (currentValue , previousEMA float64 ) float64 {
73+ return s .alpha * currentValue + (1 - s .alpha )* previousEMA
74+ }
75+
5976func (s * SystemMonitor ) checkCPU () error {
6077 duration := float64 (s .interval ) / 10
6178 if duration < 5 {
@@ -74,12 +91,15 @@ func (s *SystemMonitor) checkCPU() error {
7491 return nil
7592 }
7693
77- value := cpuPercent [0 ]
78- status := s .getStatus (value , s .cpuLimit )
94+ // Calculate EMA for CPU usage
95+ instantValue := cpuPercent [0 ]
96+ s .cpuEMA = s .calculateEMA (instantValue , s .cpuEMA )
97+
98+ status := s .getStatus (s .cpuEMA , s .cpuLimit )
7999 if status == "fail" {
80- s .log .Warn ("CPU usage %.2f%% exceeds limit of %.2f%%" , value , s .cpuLimit )
100+ s .log .Warn ("CPU usage EMA %.2f%% exceeds limit of %.2f%% (instant: %.2f%%) " , s . cpuEMA , s .cpuLimit , instantValue )
81101 } else {
82- s .log .Log ("CPU usage: %.2f%% (limit: %.2f%%)" , value , s .cpuLimit )
102+ s .log .Log ("CPU usage EMA : %.2f%% (limit: %.2f%%, instant: %.2f%% )" , s . cpuEMA , s .cpuLimit , instantValue )
83103 }
84104
85105 metric := Metric {
@@ -88,7 +108,7 @@ func (s *SystemMonitor) checkCPU() error {
88108 AlertID : fmt .Sprintf ("cpu-%s" , s .hostname ),
89109 Timestamp : time .Now ().Unix (),
90110 Status : status ,
91- Value : value ,
111+ Value : s . cpuEMA ,
92112 Limit : s .cpuLimit ,
93113 }
94114
@@ -101,14 +121,17 @@ func (s *SystemMonitor) checkMemory() error {
101121 return fmt .Errorf ("failed to get memory stats: %v" , err )
102122 }
103123
104- value := vmStat .UsedPercent
105- status := s .getStatus (value , s .memoryLimit )
124+ instantValue := vmStat .UsedPercent
125+ s .memoryEMA = s .calculateEMA (instantValue , s .memoryEMA )
126+
127+ status := s .getStatus (s .memoryEMA , s .memoryLimit )
106128 if status == "fail" {
107- s .log .Warn ("Memory usage %.2f%% exceeds limit of %.2f%%" , value , s .memoryLimit )
129+ s .log .Warn ("Memory usage EMA %.2f%% exceeds limit of %.2f%% (instant: %.2f%%) " , s . memoryEMA , s .memoryLimit , instantValue )
108130 } else {
109- s .log .Log ("Memory usage: %.2f%% (limit: %.2f%%), Available: %d MB, Total: %d MB" ,
110- value ,
131+ s .log .Log ("Memory usage EMA : %.2f%% (limit: %.2f%%, instant : %.2f%%), Available: %d MB, Total: %d MB" ,
132+ s . memoryEMA ,
111133 s .memoryLimit ,
134+ instantValue ,
112135 vmStat .Available / (1024 * 1024 ),
113136 vmStat .Total / (1024 * 1024 ))
114137 }
@@ -119,7 +142,7 @@ func (s *SystemMonitor) checkMemory() error {
119142 AlertID : fmt .Sprintf ("memory-%s" , s .hostname ),
120143 Timestamp : time .Now ().Unix (),
121144 Status : status ,
122- Value : value ,
145+ Value : s . memoryEMA ,
123146 Limit : s .memoryLimit ,
124147 }
125148
@@ -133,14 +156,17 @@ func (s *SystemMonitor) checkDisk() error {
133156 return fmt .Errorf ("failed to get disk usage: %v" , err )
134157 }
135158
136- value := usage .UsedPercent
137- status := s .getStatus (value , s .diskLimit )
159+ instantValue := usage .UsedPercent
160+ s .diskEMA = s .calculateEMA (instantValue , s .diskEMA )
161+
162+ status := s .getStatus (s .diskEMA , s .diskLimit )
138163 if status == "fail" {
139- s .log .Warn ("Root disk usage %.2f%% exceeds limit of %.2f%%" , value , s .diskLimit )
164+ s .log .Warn ("Root disk usage EMA %.2f%% exceeds limit of %.2f%% (instant: %.2f%%) " , s . diskEMA , s .diskLimit , instantValue )
140165 } else {
141- s .log .Log ("Root disk usage: %.2f%% (limit: %.2f%%), Free: %d MB, Total: %d MB" ,
142- value ,
166+ s .log .Log ("Root disk usage EMA : %.2f%% (limit: %.2f%%, instant : %.2f%%), Free: %d MB, Total: %d MB" ,
167+ s . diskEMA ,
143168 s .diskLimit ,
169+ instantValue ,
144170 usage .Free / (1024 * 1024 ),
145171 usage .Total / (1024 * 1024 ))
146172 }
@@ -151,7 +177,7 @@ func (s *SystemMonitor) checkDisk() error {
151177 AlertID : fmt .Sprintf ("disk-root-%s" , s .hostname ),
152178 Timestamp : time .Now ().Unix (),
153179 Status : status ,
154- Value : value ,
180+ Value : s . diskEMA ,
155181 Limit : s .diskLimit ,
156182 }); err != nil {
157183 return err
@@ -170,15 +196,18 @@ func (s *SystemMonitor) checkDisk() error {
170196 continue
171197 }
172198
173- value := usage .UsedPercent
174- status := s .getStatus (value , s .diskLimit )
199+ instantValue := usage .UsedPercent
200+ // For mounted directories, we'll use the same EMA as root for simplicity
201+ // In a more sophisticated implementation, we might want separate EMAs for each mount
202+ status := s .getStatus (s .diskEMA , s .diskLimit )
175203 if status == "fail" {
176- s .log .Warn ("Disk usage for %s %.2f%% exceeds limit of %.2f%%" , mount , value , s .diskLimit )
204+ s .log .Warn ("Disk usage for %s EMA %.2f%% exceeds limit of %.2f%% (instant: %.2f%%) " , mount , s . diskEMA , s .diskLimit , instantValue )
177205 } else {
178- s .log .Log ("Disk usage for %s: %.2f%% (limit: %.2f%%), Free: %d MB, Total: %d MB" ,
206+ s .log .Log ("Disk usage for %s EMA : %.2f%% (limit: %.2f%%, instant : %.2f%%), Free: %d MB, Total: %d MB" ,
179207 mount ,
180- value ,
208+ s . diskEMA ,
181209 s .diskLimit ,
210+ instantValue ,
182211 usage .Free / (1024 * 1024 ),
183212 usage .Total / (1024 * 1024 ))
184213 }
@@ -189,7 +218,7 @@ func (s *SystemMonitor) checkDisk() error {
189218 AlertID : fmt .Sprintf ("disk-%s-%s" , filepath .Base (mount ), s .hostname ),
190219 Timestamp : time .Now ().Unix (),
191220 Status : status ,
192- Value : value ,
221+ Value : s . diskEMA ,
193222 Limit : s .diskLimit ,
194223 }); err != nil {
195224 return err
0 commit comments