From e3511a0ac0e7ac14a538ec4ff3191e38009f920c Mon Sep 17 00:00:00 2001 From: Yuqi Yan Date: Fri, 17 Oct 2025 13:50:16 -0700 Subject: [PATCH 1/2] GCInspector should use different thresholds on GC events --- CHANGES.txt | 1 + conf/cassandra.yaml | 16 +++ conf/cassandra_latest.yaml | 16 +++ .../org/apache/cassandra/config/Config.java | 2 + .../cassandra/config/DatabaseDescriptor.java | 21 ++++ .../apache/cassandra/service/GCInspector.java | 82 ++++++++++++-- .../cassandra/service/GCInspectorMXBean.java | 4 + .../cassandra/service/GCInspectorTest.java | 104 +++++++++++++++--- 8 files changed, 220 insertions(+), 26 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 9bca6a4bdee0..dac5d5450675 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.1 + * GCInspector should use different thresholds on GC events for warning messages (CASSANDRA-20980) * Add cqlsh autocompletion for the identity mapping feature (CASSANDRA-20021) * Add DDL Guardrail enabling administrators to disallow creation/modification of keyspaces with durable_writes = false (CASSANDRA-20913) * Optimize Counter, Meter and Histogram metrics using thread local counters (CASSANDRA-20250) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 0147adc74b6a..e90b64bb5cfe 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2048,6 +2048,11 @@ batch_size_fail_threshold: 50KiB # Log WARN on any batches not of type LOGGED than span across more partitions than this limit unlogged_batch_across_partitions_warn_threshold: 10 +# GCInspector configs: +# For GC like ShenandoahGC/ZGC etc., there are GC events that do not have STW pauses (Concurrent phases) +# Operator might find it reasonable to use lower thresholds for events require STW pauses and higher thresholds +# for concurrent phases. +# # GC Pauses greater than 200 ms will be logged at INFO level # This threshold can be adjusted to minimize logging if necessary # Min unit: ms @@ -2059,6 +2064,17 @@ unlogged_batch_across_partitions_warn_threshold: 10 # Min unit: ms # gc_warn_threshold: 1000ms +# GC Concurrent phase greater than 200 ms will be logged at INFO level +# This threshold can be adjusted to minimize logging if necessary +# Min unit: ms +# gc_concurrent_phase_log_threshold: 1000ms + +# GC Concurrent phase than gc_concurrent_phase_warn_threshold will be logged at WARN level +# Adjust the threshold based on your application throughput requirement. Setting to 0 +# will deactivate the feature. +# Min unit: ms +# gc_concurrent_phase_warn_threshold: 2000ms + # Maximum size of any value in SSTables. Safety measure to detect SSTable corruption # early. Any value size larger than this threshold will result into marking an SSTable # as corrupted. This should be positive and less than 2GiB. diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index f42604843146..8474ce16d854 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -1913,6 +1913,11 @@ batch_size_fail_threshold: 50KiB # Log WARN on any batches not of type LOGGED than span across more partitions than this limit unlogged_batch_across_partitions_warn_threshold: 10 +# GCInspector configs: +# For GC like ShenandoahGC/ZGC etc., there are GC events that do not have STW pauses. +# Such events are called Concurrent phases. Operator might find it reasonable to use lower thresholds +# for events require STW pauses and higher thresholds for concurrent phases. +# # GC Pauses greater than 200 ms will be logged at INFO level # This threshold can be adjusted to minimize logging if necessary # Min unit: ms @@ -1924,6 +1929,17 @@ unlogged_batch_across_partitions_warn_threshold: 10 # Min unit: ms # gc_warn_threshold: 1000ms +# GC Concurrent phase greater than 200 ms will be logged at INFO level +# This threshold can be adjusted to minimize logging if necessary +# Min unit: ms +# gc_concurrent_phase_log_threshold: 1000ms + +# GC Concurrent phase than gc_concurrent_phase_warn_threshold will be logged at WARN level +# Adjust the threshold based on your application throughput requirement. Setting to 0 +# will deactivate the feature. +# Min unit: ms +# gc_concurrent_phase_warn_threshold: 2000ms + # Maximum size of any value in SSTables. Safety measure to detect SSTable corruption # early. Any value size larger than this threshold will result into marking an SSTable # as corrupted. This should be positive and less than 2GiB. diff --git a/src/java/org/apache/cassandra/config/Config.java b/src/java/org/apache/cassandra/config/Config.java index 9783e086ad13..781248aa6f15 100644 --- a/src/java/org/apache/cassandra/config/Config.java +++ b/src/java/org/apache/cassandra/config/Config.java @@ -582,6 +582,8 @@ public static class SSTableConfig public volatile DurationSpec.IntMillisecondsBound gc_log_threshold = new DurationSpec.IntMillisecondsBound("200ms"); @Replaces(oldName = "gc_warn_threshold_in_ms", converter = Converters.MILLIS_DURATION_INT, deprecated = true) public volatile DurationSpec.IntMillisecondsBound gc_warn_threshold = new DurationSpec.IntMillisecondsBound("1s"); + public volatile DurationSpec.IntMillisecondsBound gc_concurrent_phase_log_threshold = new DurationSpec.IntMillisecondsBound("1s"); + public volatile DurationSpec.IntMillisecondsBound gc_concurrent_phase_warn_threshold = new DurationSpec.IntMillisecondsBound("2s"); // TTL for different types of trace events. @Replaces(oldName = "tracetype_query_ttl", converter = Converters.SECONDS_DURATION, deprecated=true) diff --git a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java index ec76193e1046..64448242763e 100644 --- a/src/java/org/apache/cassandra/config/DatabaseDescriptor.java +++ b/src/java/org/apache/cassandra/config/DatabaseDescriptor.java @@ -4698,6 +4698,27 @@ public static void setGCWarnThreshold(int threshold) conf.gc_warn_threshold = new DurationSpec.IntMillisecondsBound(threshold); } + public static int getGCConcurrentPhaseLogThreshold() + { + return conf.gc_concurrent_phase_log_threshold.toMilliseconds(); + } + + public static void setGCConcurrentPhaseLogThreshold(int threshold) + { + conf.gc_concurrent_phase_log_threshold = new DurationSpec.IntMillisecondsBound(threshold); + } + + + public static int getGCConcurrentPhaseWarnThreshold() + { + return conf.gc_concurrent_phase_warn_threshold.toMilliseconds(); + } + + public static void setGCConcurrentPhaseWarnThreshold(int threshold) + { + conf.gc_concurrent_phase_warn_threshold = new DurationSpec.IntMillisecondsBound(threshold); + } + public static boolean isCDCEnabled() { return conf.cdc_enabled; diff --git a/src/java/org/apache/cassandra/service/GCInspector.java b/src/java/org/apache/cassandra/service/GCInspector.java index 4bdc3a05bade..f121f6c5487b 100644 --- a/src/java/org/apache/cassandra/service/GCInspector.java +++ b/src/java/org/apache/cassandra/service/GCInspector.java @@ -287,16 +287,28 @@ public void handleNotification(final Notification notification, final Object han if (state.compareAndSet(prev, new State(duration, bytes, prev))) break; } - - if (getGcWarnThresholdInMs() != 0 && duration > getGcWarnThresholdInMs()) - logger.warn(sb.toString()); - else if (duration > getGcLogThresholdInMs()) - logger.info(sb.toString()); - else if (logger.isTraceEnabled()) - logger.trace(sb.toString()); - if (duration > this.getStatusThresholdInMs()) - StatusLogger.log(); + if (isConcurrentPhase(info.getGcCause(), info.getGcName())) + { + if (getGcWarnThresholdInMs() != 0 && duration > getGcWarnThresholdInMs()) + logger.warn(sb.toString()); + else if (duration > getGcLogThresholdInMs()) + logger.info(sb.toString()); + else if (logger.isTraceEnabled()) + logger.trace(sb.toString()); + // TODO: trigger StatusLogger if concurrent phases take too long? + } + else + { + if (getGcConcurrentPhaseWarnThresholdInMs() != 0 && duration > getGcConcurrentPhaseWarnThresholdInMs()) + logger.warn(sb.toString()); + else if (duration > getGcConcurrentPhaseLogThresholdInMs()) + logger.info(sb.toString()); + else if (logger.isTraceEnabled()) + logger.trace(sb.toString()); + if (duration > this.getStatusThresholdInMs()) + StatusLogger.log(); + } // if we just finished an old gen collection and we're still using a lot of memory, try to reduce the pressure if (gcState.assumeGCIsOldGen) @@ -304,6 +316,24 @@ else if (logger.isTraceEnabled()) } } + static boolean isConcurrentPhase(String cause, String name) { + // Mostly taken from: https://github.com/Netflix/spectator/blob/v1.7.x/spectator-ext-gc/src/main/java/com/netflix/spectator/gc/GcLogger.java + // So far the only indicator known is that the cause will be reported as "No GC" + // when using CMS. + // + // For ZGC, behavior was changed in JDK17: https://bugs.openjdk.java.net/browse/JDK-8265136 + // For ZGC in older versions, there is no way to accurately get the amount of time + // in STW pauses. + // + // For G1, a new bean is added in JDK20 to indicate time spent in concurrent phases: + // https://bugs.openjdk.org/browse/JDK-8297247 + + return "No GC".equals(cause) // CMS + || "G1 Concurrent GC".equals(name) // G1 in JDK20+ + || name.endsWith(" Cycles"); // Shenandoah, ZGC + } + + public State getTotalSinceLastCheck() { return state.getAndSet(new State()); @@ -407,6 +437,40 @@ public void setGcLogThresholdInMs(long threshold) DatabaseDescriptor.setGCLogThreshold((int) threshold); } + public int getGcConcurrentPhaseWarnThresholdInMs() + { + return DatabaseDescriptor.getGCConcurrentPhaseWarnThreshold(); + } + + public void setGcConcurrentPhaseWarnThresholdInMs(int threshold) + { + long gcConcurrentPhaseLogThresholdInMs = getGcConcurrentPhaseLogThresholdInMs(); + if (threshold < 0) + throw new IllegalArgumentException("Threshold must be greater than or equal to 0"); + if (threshold != 0 && threshold <= gcConcurrentPhaseLogThresholdInMs) + throw new IllegalArgumentException("Threshold must be greater than gcConcurrentPhaseLogThresholdInMs which is currently " + + gcConcurrentPhaseLogThresholdInMs); + DatabaseDescriptor.setGCConcurrentPhaseWarnThreshold(threshold); + } + + public int getGcConcurrentPhaseLogThresholdInMs() + { + return DatabaseDescriptor.getGCConcurrentPhaseLogThreshold(); + } + + public void setGcConcurrentPhaseLogThresholdInMs(int threshold) + { + if (threshold <= 0) + throw new IllegalArgumentException("Threshold must be greater than 0"); + + long gcConcurrentPhaseWarnThresholdInMs = getGcConcurrentPhaseWarnThresholdInMs(); + if (gcConcurrentPhaseWarnThresholdInMs != 0 && threshold > gcConcurrentPhaseWarnThresholdInMs) + throw new IllegalArgumentException("Threshold must be less than gcConcurrentPhaseWarnThresholdInMs which is currently " + + gcConcurrentPhaseWarnThresholdInMs); + + DatabaseDescriptor.setGCConcurrentPhaseLogThreshold(threshold); + } + public long getGcLogThresholdInMs() { return DatabaseDescriptor.getGCLogThreshold(); diff --git a/src/java/org/apache/cassandra/service/GCInspectorMXBean.java b/src/java/org/apache/cassandra/service/GCInspectorMXBean.java index 08795ed269de..9765e9813c3c 100644 --- a/src/java/org/apache/cassandra/service/GCInspectorMXBean.java +++ b/src/java/org/apache/cassandra/service/GCInspectorMXBean.java @@ -27,6 +27,10 @@ public interface GCInspectorMXBean void setGcWarnThresholdInMs(long threshold); long getGcWarnThresholdInMs(); void setGcLogThresholdInMs(long threshold); + int getGcConcurrentPhaseLogThresholdInMs(); + void setGcConcurrentPhaseWarnThresholdInMs(int threshold); + int getGcConcurrentPhaseWarnThresholdInMs(); + void setGcConcurrentPhaseLogThresholdInMs(int threshold); long getGcLogThresholdInMs(); long getStatusThresholdInMs(); } diff --git a/test/unit/org/apache/cassandra/service/GCInspectorTest.java b/test/unit/org/apache/cassandra/service/GCInspectorTest.java index bcdf023294ad..5ccaf5e2e93b 100644 --- a/test/unit/org/apache/cassandra/service/GCInspectorTest.java +++ b/test/unit/org/apache/cassandra/service/GCInspectorTest.java @@ -18,11 +18,16 @@ package org.apache.cassandra.service; import org.apache.cassandra.config.DatabaseDescriptor; -import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + + public class GCInspectorTest { @@ -43,38 +48,81 @@ public void before() @Test public void ensureStaticFieldsHydrateFromConfig() { - Assert.assertEquals(DatabaseDescriptor.getGCLogThreshold(), gcInspector.getGcLogThresholdInMs()); - Assert.assertEquals(DatabaseDescriptor.getGCWarnThreshold(), gcInspector.getGcWarnThresholdInMs()); + assertEquals(DatabaseDescriptor.getGCLogThreshold(), gcInspector.getGcLogThresholdInMs()); + assertEquals(DatabaseDescriptor.getGCWarnThreshold(), gcInspector.getGcWarnThresholdInMs()); + assertEquals(DatabaseDescriptor.getGCConcurrentPhaseLogThreshold(), gcInspector.getGcConcurrentPhaseLogThresholdInMs()); + assertEquals(DatabaseDescriptor.getGCConcurrentPhaseWarnThreshold(), gcInspector.getGcConcurrentPhaseWarnThresholdInMs()); } @Test public void ensureStatusIsCalculated() { - Assert.assertTrue(gcInspector.getStatusThresholdInMs() > 0); + assertTrue(gcInspector.getStatusThresholdInMs() > 0); } - @Test(expected=IllegalArgumentException.class) + @Test public void ensureWarnGreaterThanLog() { - gcInspector.setGcWarnThresholdInMs(gcInspector.getGcLogThresholdInMs()); + try + { + gcInspector.setGcWarnThresholdInMs(gcInspector.getGcLogThresholdInMs()); + fail("Expect IllegalArgumentException"); + } + catch (IllegalArgumentException e) + { + // expected + } + try + { + gcInspector.setGcConcurrentPhaseWarnThresholdInMs(gcInspector.getGcConcurrentPhaseLogThresholdInMs()); + fail("Expect IllegalArgumentException"); + } + catch (IllegalArgumentException e) + { + // expected + } } @Test public void ensureZeroIsOk() { gcInspector.setGcWarnThresholdInMs(0); - Assert.assertEquals(gcInspector.getStatusThresholdInMs(), gcInspector.getGcLogThresholdInMs()); - Assert.assertEquals(0, DatabaseDescriptor.getGCWarnThreshold()); - Assert.assertEquals(200, DatabaseDescriptor.getGCLogThreshold()); + assertEquals(gcInspector.getStatusThresholdInMs(), gcInspector.getGcLogThresholdInMs()); + assertEquals(0, DatabaseDescriptor.getGCWarnThreshold()); + assertEquals(200, DatabaseDescriptor.getGCLogThreshold()); + + gcInspector.setGcConcurrentPhaseWarnThresholdInMs(0); + assertEquals(0, DatabaseDescriptor.getGCConcurrentPhaseWarnThreshold()); + assertEquals(1000, DatabaseDescriptor.getGCConcurrentPhaseLogThreshold()); } - @Test(expected=IllegalArgumentException.class) + @Test public void ensureLogLessThanWarn() { - Assert.assertEquals(200, gcInspector.getGcLogThresholdInMs()); + assertEquals(200, gcInspector.getGcLogThresholdInMs()); gcInspector.setGcWarnThresholdInMs(1000); - Assert.assertEquals(1000, gcInspector.getGcWarnThresholdInMs()); - gcInspector.setGcLogThresholdInMs(gcInspector.getGcWarnThresholdInMs() + 1); + assertEquals(1000, gcInspector.getGcWarnThresholdInMs()); + try + { + gcInspector.setGcLogThresholdInMs(gcInspector.getGcWarnThresholdInMs() + 1); + fail("Expect IllegalArgumentException"); + } + catch (IllegalArgumentException e) + { + // expected + } + assertEquals(1000, gcInspector.getGcConcurrentPhaseLogThresholdInMs()); + gcInspector.setGcConcurrentPhaseWarnThresholdInMs(2000); + assertEquals(2000, gcInspector.getGcConcurrentPhaseWarnThresholdInMs()); + try + { + gcInspector.setGcConcurrentPhaseLogThresholdInMs(gcInspector.getGcConcurrentPhaseWarnThresholdInMs() + 1); + fail("Expect IllegalArgumentException"); + } + catch (IllegalArgumentException e) + { + // expected + } } @Test @@ -82,10 +130,16 @@ public void testDefaults() { gcInspector.setGcLogThresholdInMs(200); gcInspector.setGcWarnThresholdInMs(1000); - Assert.assertEquals(200, DatabaseDescriptor.getGCLogThreshold()); - Assert.assertEquals(200, gcInspector.getGcLogThresholdInMs()); - Assert.assertEquals(1000, DatabaseDescriptor.getGCWarnThreshold()); - Assert.assertEquals(1000, gcInspector.getGcWarnThresholdInMs()); + gcInspector.setGcConcurrentPhaseLogThresholdInMs(1000); + gcInspector.setGcConcurrentPhaseWarnThresholdInMs(2000); + assertEquals(200, DatabaseDescriptor.getGCLogThreshold()); + assertEquals(200, gcInspector.getGcLogThresholdInMs()); + assertEquals(1000, DatabaseDescriptor.getGCWarnThreshold()); + assertEquals(1000, gcInspector.getGcWarnThresholdInMs()); + assertEquals(1000, DatabaseDescriptor.getGCConcurrentPhaseLogThreshold()); + assertEquals(1000, gcInspector.getGcConcurrentPhaseLogThresholdInMs()); + assertEquals(2000, DatabaseDescriptor.getGCConcurrentPhaseWarnThreshold()); + assertEquals(2000, gcInspector.getGcConcurrentPhaseWarnThresholdInMs()); } @Test(expected=IllegalArgumentException.class) @@ -94,4 +148,20 @@ public void testMaxValue() gcInspector.setGcLogThresholdInMs(200); gcInspector.setGcWarnThresholdInMs(Integer.MAX_VALUE+1L); } + + @Test + public void testIsConcurrentPhase() + { + assertTrue("No GC cause should be considered concurrent", + GCInspector.isConcurrentPhase("No GC", "SomeGCName")); + assertTrue("Shenandoah Cycles should be considered concurrent", + GCInspector.isConcurrentPhase("SomeCause", "Shenandoah Cycles")); + assertTrue("ZGC Cycles should be considered concurrent", + GCInspector.isConcurrentPhase("SomeCause", "ZGC Cycles")); + + assertFalse("ParallelGC should not be considered concurrent", + GCInspector.isConcurrentPhase("Allocation Failure", "PS Scavenge")); + assertFalse("G1 Young Generation should not be considered concurrent", + GCInspector.isConcurrentPhase("G1 Evacuation Pause", "G1 Young Generation")); + } } From cc31992bae5fd36b7960bd5f2af9532827237362 Mon Sep 17 00:00:00 2001 From: Yuqi Yan Date: Mon, 20 Oct 2025 08:10:17 -0700 Subject: [PATCH 2/2] upstream fix --- conf/cassandra.yaml | 2 +- conf/cassandra_latest.yaml | 2 +- src/java/org/apache/cassandra/service/GCInspector.java | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index e90b64bb5cfe..70f00a421daa 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -2064,7 +2064,7 @@ unlogged_batch_across_partitions_warn_threshold: 10 # Min unit: ms # gc_warn_threshold: 1000ms -# GC Concurrent phase greater than 200 ms will be logged at INFO level +# GC Concurrent phase greater than 1000 ms will be logged at INFO level # This threshold can be adjusted to minimize logging if necessary # Min unit: ms # gc_concurrent_phase_log_threshold: 1000ms diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml index 8474ce16d854..facce1b0f90b 100644 --- a/conf/cassandra_latest.yaml +++ b/conf/cassandra_latest.yaml @@ -1929,7 +1929,7 @@ unlogged_batch_across_partitions_warn_threshold: 10 # Min unit: ms # gc_warn_threshold: 1000ms -# GC Concurrent phase greater than 200 ms will be logged at INFO level +# GC Concurrent phase greater than 1000 ms will be logged at INFO level # This threshold can be adjusted to minimize logging if necessary # Min unit: ms # gc_concurrent_phase_log_threshold: 1000ms diff --git a/src/java/org/apache/cassandra/service/GCInspector.java b/src/java/org/apache/cassandra/service/GCInspector.java index f121f6c5487b..8d5e2c4971cc 100644 --- a/src/java/org/apache/cassandra/service/GCInspector.java +++ b/src/java/org/apache/cassandra/service/GCInspector.java @@ -290,9 +290,9 @@ public void handleNotification(final Notification notification, final Object han if (isConcurrentPhase(info.getGcCause(), info.getGcName())) { - if (getGcWarnThresholdInMs() != 0 && duration > getGcWarnThresholdInMs()) + if (getGcConcurrentPhaseWarnThresholdInMs() != 0 && duration > getGcConcurrentPhaseWarnThresholdInMs()) logger.warn(sb.toString()); - else if (duration > getGcLogThresholdInMs()) + else if (duration > getGcConcurrentPhaseLogThresholdInMs()) logger.info(sb.toString()); else if (logger.isTraceEnabled()) logger.trace(sb.toString()); @@ -300,9 +300,9 @@ else if (logger.isTraceEnabled()) } else { - if (getGcConcurrentPhaseWarnThresholdInMs() != 0 && duration > getGcConcurrentPhaseWarnThresholdInMs()) + if (getGcWarnThresholdInMs() != 0 && duration > getGcWarnThresholdInMs()) logger.warn(sb.toString()); - else if (duration > getGcConcurrentPhaseLogThresholdInMs()) + else if (duration > getGcLogThresholdInMs()) logger.info(sb.toString()); else if (logger.isTraceEnabled()) logger.trace(sb.toString());