From 032b184a5bba01fdbc1fb16e9ec7603593ade728 Mon Sep 17 00:00:00 2001 From: Mark Goddard Date: Mon, 3 Jun 2024 13:36:31 +0100 Subject: [PATCH] Revert "Add alerts for low available swap space" --- .../kolla/config/prometheus/system.rules | 18 ------------------ etc/kayobe/stackhpc-monitoring.yml | 6 ------ ...for-swap-availability-75e28ed7f913d1ec.yaml | 13 ------------- 3 files changed, 37 deletions(-) delete mode 100644 releasenotes/notes/add-alerts-for-swap-availability-75e28ed7f913d1ec.yaml diff --git a/etc/kayobe/kolla/config/prometheus/system.rules b/etc/kayobe/kolla/config/prometheus/system.rules index 7981a5609..613368be6 100644 --- a/etc/kayobe/kolla/config/prometheus/system.rules +++ b/etc/kayobe/kolla/config/prometheus/system.rules @@ -24,24 +24,6 @@ groups: summary: "Prometheus exporter at {{ $labels.instance }} reports low memory" description: "Available memory is {{ $value }} GiB." - - alert: LowSwapSpace - expr: (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes) < {% endraw %}{{ alertmanager_node_free_swap_warning_threshold_ratio }}{% raw %} - for: 1m - labels: - severity: warning - annotations: - summary: "Swap space at {{ $labels.instance }} reports low memory" - description: "Available swap space is {{ $value | humanizePercentage }}. Running out of swap space causes OOM Kills." - - - alert: LowSwapSpace - expr: (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes) < {% endraw %}{{ alertmanager_node_free_swap_critical_threshold_ratio }}{% raw %} - for: 1m - labels: - severity: critical - annotations: - summary: "Swap space at {{ $labels.instance }} reports low memory" - description: "Available swap space is {{ $value | humanizePercentage }}. Running out of swap space causes OOM Kills." - - alert: HostOomKillDetected expr: increase(node_vmstat_oom_kill[5m]) > 0 for: 5m diff --git a/etc/kayobe/stackhpc-monitoring.yml b/etc/kayobe/stackhpc-monitoring.yml index d6d1c1a76..de12ed2ed 100644 --- a/etc/kayobe/stackhpc-monitoring.yml +++ b/etc/kayobe/stackhpc-monitoring.yml @@ -12,12 +12,6 @@ alertmanager_low_memory_threshold_gib: 5 # link. Change to false to disable this alert. alertmanager_warn_network_bond_single_link: true -# Threshold to trigger an LowSwapSpace alert on swap space depletion (ratio). -# When the ratio of free swap space is lower than each of these values, warning -# and critical alerts will be triggered respectively. -alertmanager_node_free_swap_warning_threshold_ratio: 0.25 -alertmanager_node_free_swap_critical_threshold_ratio: 0.1 - ############################################################################### # Exporter configuration diff --git a/releasenotes/notes/add-alerts-for-swap-availability-75e28ed7f913d1ec.yaml b/releasenotes/notes/add-alerts-for-swap-availability-75e28ed7f913d1ec.yaml deleted file mode 100644 index db5efb85c..000000000 --- a/releasenotes/notes/add-alerts-for-swap-availability-75e28ed7f913d1ec.yaml +++ /dev/null @@ -1,13 +0,0 @@ ---- -features: - - | - Added two alerts (Warning and critical) that are triggered when the ratio - of (free_swap_sppace / total_swap_space) is below thresholds. - Each threshold can be modified by alterting value of - ``alertmanager_node_free_swap_warning_threshold_ratio`` and - ``alertmanager_node_free_swap_critical_threshold_ratio``. - - Currently this solution has limitation of having one-size fits all policy. - This can cause unwanted alerts for the hosts which utilise swap heavily - Therefore it is recommended to tune the thresholds or apply silence rules - for the needs.