From 0537658c8283c0a138aa9660dc3fe1e97cb9424f Mon Sep 17 00:00:00 2001 From: Vanshikav123 Date: Sat, 21 Jun 2025 23:24:29 +0530 Subject: [PATCH] added new metric Signed-off-by: Vanshikav123 --- cgroup/memory.go | 24 +++++++++++++++--------- containers/container.go | 4 ++++ containers/metrics.go | 18 ++++++++++-------- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/cgroup/memory.go b/cgroup/memory.go index cb2e6ca..bb0df84 100644 --- a/cgroup/memory.go +++ b/cgroup/memory.go @@ -9,9 +9,11 @@ import ( const maxMemory = 1 << 62 type MemoryStat struct { - RSS uint64 - Cache uint64 - Limit uint64 + RSS uint64 + Cache uint64 + Limit uint64 + PgFault uint64 + PgMajFault uint64 } func (cg *Cgroup) MemoryStat() *MemoryStat { @@ -48,9 +50,11 @@ func (cg *Cgroup) memoryStatV1() (*MemoryStat, error) { // mapped_file is accounted only when the memory cgroup is owner of page // cache.) return &MemoryStat{ - RSS: vars["rss"] + vars["mapped_file"], - Cache: vars["cache"], - Limit: limit, + RSS: vars["rss"] + vars["mapped_file"], + Cache: vars["cache"], + Limit: limit, + PgFault: vars["pgfault"], + PgMajFault: vars["pgmajfault"], }, nil } @@ -64,8 +68,10 @@ func (cg *Cgroup) memoryStatV2() (*MemoryStat, error) { } limit, _ := common.ReadUintFromFile(path.Join(cg2Root, cg.subsystems[""], "memory.max")) return &MemoryStat{ - RSS: vars["anon"] + vars["file_mapped"], - Cache: vars["file"], - Limit: limit, + RSS: vars["anon"] + vars["file_mapped"], + Cache: vars["file"], + Limit: limit, + PgFault: vars["pgfault"], + PgMajFault: vars["pgmajfault"], }, nil } diff --git a/containers/container.go b/containers/container.go index a35eb4c..9c01d97 100644 --- a/containers/container.go +++ b/containers/container.go @@ -263,6 +263,10 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) { if s.Limit > 0 { ch <- gauge(metrics.MemoryLimit, float64(s.Limit)) } + if s.PgFault > 0 { + ch <- counter(metrics.MemoryPageFaults, float64(s.PgMajFault), "major") + ch <- counter(metrics.MemoryPageFaults, float64(s.PgFault-s.PgMajFault), "minor") + } } if c.oomKills > 0 { diff --git a/containers/metrics.go b/containers/metrics.go index 2d0df76..fea7043 100644 --- a/containers/metrics.go +++ b/containers/metrics.go @@ -14,10 +14,11 @@ var metrics = struct { CPUDelay *prometheus.Desc ThrottledTime *prometheus.Desc - MemoryLimit *prometheus.Desc - MemoryRss *prometheus.Desc - MemoryCache *prometheus.Desc - OOMKills *prometheus.Desc + MemoryLimit *prometheus.Desc + MemoryRss *prometheus.Desc + MemoryCache *prometheus.Desc + OOMKills *prometheus.Desc + MemoryPageFaults *prometheus.Desc DiskDelay *prometheus.Desc DiskSize *prometheus.Desc @@ -65,10 +66,11 @@ var metrics = struct { CPUDelay: metric("container_resources_cpu_delay_seconds_total", "Total time duration processes of the container have been waiting for a CPU (while being runnable)"), ThrottledTime: metric("container_resources_cpu_throttled_seconds_total", "Total time duration the container has been throttled"), - MemoryLimit: metric("container_resources_memory_limit_bytes", "Memory limit of the container"), - MemoryRss: metric("container_resources_memory_rss_bytes", "Amount of physical memory used by the container (doesn't include page cache)"), - MemoryCache: metric("container_resources_memory_cache_bytes", "Amount of page cache memory allocated by the container"), - OOMKills: metric("container_oom_kills_total", "Total number of times the container was terminated by the OOM killer"), + MemoryLimit: metric("container_resources_memory_limit_bytes", "Memory limit of the container"), + MemoryRss: metric("container_resources_memory_rss_bytes", "Amount of physical memory used by the container (doesn't include page cache)"), + MemoryCache: metric("container_resources_memory_cache_bytes", "Amount of page cache memory allocated by the container"), + OOMKills: metric("container_oom_kills_total", "Total number of times the container was terminated by the OOM killer"), + MemoryPageFaults: metric("container_resources_memory_page_faults_total", "Total number of page faults by the container", "type"), DiskDelay: metric("container_resources_disk_delay_seconds_total", "Total time duration processes of the container have been waiting fot I/Os to complete"), DiskSize: metric("container_resources_disk_size_bytes", "Total capacity of the volume", "mount_point", "device", "volume"),