Skip to content

Commit 451445b

Browse files
Merge pull request #24 from strongbox/issue-1791
issue-1791: Improve performance by customizing Cassandra and JanusGraph settings
2 parents 7abd449 + 9632cb3 commit 451445b

File tree

2 files changed

+197
-56
lines changed

2 files changed

+197
-56
lines changed

strongbox-db-server/src/main/java/org/strongbox/db/server/CassandraEmbeddedProperties.java

Lines changed: 144 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
import org.apache.cassandra.config.ParameterizedClass;
1111
import org.apache.cassandra.exceptions.ConfigurationException;
1212

13-
public class CassandraEmbeddedProperties implements CassandraEmbeddedConfiguration
13+
public class CassandraEmbeddedProperties
14+
implements CassandraEmbeddedConfiguration
1415
{
1516

1617
private volatile static CassandraEmbeddedProperties instance;
@@ -33,60 +34,162 @@ private CassandraEmbeddedProperties(String storageFolder,
3334
Objects.isNull(instance);
3435

3536
Config configLocal = new Config();
37+
38+
// ------------------------
39+
// Generic configuration
40+
// ------------------------
3641
configLocal.cluster_name = "Test Cluster";
37-
configLocal.hinted_handoff_enabled = true;
38-
configLocal.max_hint_window_in_ms = 10800000; // 3 hours
39-
configLocal.hinted_handoff_throttle_in_kb = 1024;
40-
configLocal.max_hints_delivery_threads = 2;
41-
configLocal.hints_directory = String.format("%s/cassandra/hints", storageFolder);
42+
configLocal.snapshot_before_compaction = false;
43+
configLocal.auto_snapshot = false;
44+
45+
// ------------------------
46+
// Security
47+
// ------------------------
4248
configLocal.authenticator = "PasswordAuthenticator";
4349
configLocal.authorizer = "AllowAllAuthorizer";
4450
configLocal.permissions_validity_in_ms = 2000;
45-
configLocal.partitioner = "org.apache.cassandra.dht.Murmur3Partitioner";
46-
configLocal.data_file_directories = new String[] { String.format("%s/cassandra/data", storageFolder) };
47-
configLocal.commitlog_directory = String.format("%s/cassandra/commitlog", storageFolder);
48-
configLocal.cdc_raw_directory = String.format("%s/cassandra/cdc", storageFolder);
49-
configLocal.disk_failure_policy = DiskFailurePolicy.stop;
50-
configLocal.key_cache_save_period = 14400;
51-
configLocal.row_cache_size_in_mb = 0;
52-
configLocal.row_cache_save_period = 0;
53-
configLocal.saved_caches_directory = String.format("%s/cassandra/saved_caches", storageFolder);
54-
configLocal.commitlog_sync = CommitLogSync.periodic;
55-
configLocal.commitlog_sync_period_in_ms = 10000;
56-
configLocal.commitlog_segment_size_in_mb = 8;
57-
configLocal.max_mutation_size_in_kb = 4096;
58-
configLocal.seed_provider = new ParameterizedClass("org.apache.cassandra.locator.SimpleSeedProvider",
59-
Collections.singletonMap("seeds", "127.0.0.1"));
60-
configLocal.concurrent_reads = 32;
61-
configLocal.concurrent_writes = 32;
62-
configLocal.trickle_fsync = false;
63-
configLocal.trickle_fsync_interval_in_kb = 10240;
51+
52+
// ------------------------
53+
// Network
54+
// ------------------------
55+
configLocal.start_rpc = false; // disable thrift - it's deprecated and should not be used!
56+
configLocal.listen_address = "127.0.0.1";
6457
configLocal.storage_port = 7010;
6558
configLocal.ssl_storage_port = 7011;
66-
configLocal.listen_address = "127.0.0.1";
67-
6859
configLocal.start_native_transport = true;
6960
configLocal.native_transport_port = port;
61+
configLocal.native_transport_max_threads = 256;
7062

71-
configLocal.start_rpc = false;
72-
configLocal.incremental_backups = false;
73-
configLocal.snapshot_before_compaction = false;
74-
configLocal.auto_snapshot = false;
75-
configLocal.column_index_size_in_kb = 64;
76-
configLocal.compaction_throughput_mb_per_sec = 16;
63+
// Coordinator related
7764
configLocal.read_request_timeout_in_ms = 5000;
7865
configLocal.range_request_timeout_in_ms = 10000;
7966
configLocal.write_request_timeout_in_ms = 2000;
8067
configLocal.cas_contention_timeout_in_ms = 1000;
8168
configLocal.truncate_request_timeout_in_ms = 60000;
8269
configLocal.request_timeout_in_ms = 10000;
8370
configLocal.cross_node_timeout = false;
71+
72+
// Addresses of hosts that are deemed contact points.
73+
// Cassandra nodes use this list of hosts to find each other and learn
74+
// the topology of the ring. You must change this if you are running multiple nodes!
75+
// - seeds is actually a comma-delimited list of addresses.
76+
// Ex: "seeds: <ip1>,<ip2>,<ip3>"
77+
configLocal.seed_provider = new ParameterizedClass("org.apache.cassandra.locator.SimpleSeedProvider",
78+
Collections.singletonMap("seeds", "127.0.0.1"));
79+
80+
// ------------------------
81+
// Storage locations
82+
// ------------------------
83+
configLocal.data_file_directories = new String[]{ String.format("%s/cassandra/data", storageFolder) };
84+
configLocal.saved_caches_directory = String.format("%s/cassandra/saved_caches", storageFolder);
85+
configLocal.commitlog_directory = String.format("%s/cassandra/commitlog", storageFolder);
86+
// CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the segment contains
87+
// mutations for a CDC-enabled table. This should be placed on a separate spindle than the data directories.
88+
// If not set, the default directory is $CASSANDRA_HOME/data/cdc_raw.
89+
configLocal.cdc_raw_directory = String.format("%s/cassandra/cdc", storageFolder);
90+
configLocal.hints_directory = String.format("%s/cassandra/hints", storageFolder);
91+
92+
// ------------------------
93+
// Disk
94+
// ------------------------
95+
configLocal.disk_optimization_strategy = Config.DiskOptimizationStrategy.ssd;
96+
configLocal.disk_failure_policy = DiskFailurePolicy.stop;
97+
configLocal.trickle_fsync = true; // good on SSDs, probably bad on hdd
98+
configLocal.trickle_fsync_interval_in_kb = 10240; // default: 10240
99+
configLocal.concurrent_compactors = 2;
100+
configLocal.compaction_throughput_mb_per_sec = 128; // 128+ for SSD, 16 for hdd;
101+
// - Usually (16 × number_of_drives)
102+
configLocal.concurrent_reads = 32;
103+
// - Writes in Cassandra are rarely I/O bound, so the ideal number of concurrent writes depends on the number
104+
// of CPU cores on the node. The recommended value is 8 × number_of_cpu_cores.
105+
configLocal.concurrent_writes = 16;
106+
// - Counter writes read the current values before incrementing and writing them back.
107+
// The recommended value is (16 × number_of_drives).
108+
configLocal.concurrent_counter_writes = 32;
109+
// - Limit on the number of concurrent materialized view writes. Set this to the lesser of concurrent reads or
110+
// concurrent writes, because there is a read involved in each materialized view write. (Default: 32)
111+
configLocal.concurrent_materialized_view_writes = 32;
112+
113+
// - (Default 1024KB ) Total maximum throttle for replaying hints. Throttling is reduced proportionally to
114+
// the number of nodes in the cluster.
115+
//configLocal.batchlog_replay_throttle_in_kb = 1024;
116+
117+
// Cache and index settings
118+
// - (Default: 64) Granularity of the index of rows within a partition. For huge rows, decrease this setting to
119+
// improve seek time. If you use key cache, be careful not to make this setting too large because key cache
120+
// will be overwhelmed. If you're unsure of the size of the rows, it's best to use the default setting.
121+
configLocal.column_index_size_in_kb = 32; // default 64
122+
configLocal.column_index_cache_size_in_kb = 1024;
123+
124+
// Memtable (in-memory structures where Cassandra buffers writes)
125+
// https://cassandra.apache.org/doc/latest/architecture/storage_engine.html#memtables
126+
configLocal.memtable_allocation_type = Config.MemtableAllocationType.offheap_objects;
127+
// - Smaller of number of disks or number of cores with a minimum of 2 and a maximum of 8
128+
// If your data directories are backed by SSDs, increase this setting to the number of cores.
129+
configLocal.memtable_flush_writers = 2;
130+
// - The compaction process opens SSTables before they are completely written and uses them in place of
131+
// the prior SSTables for any range previously written. This setting helps to smoothly transfer reads
132+
// between the SSTables by reducing page cache churn and keeps hot rows hot.
133+
configLocal.sstable_preemptive_open_interval_in_mb = 64; // default 50
134+
135+
// Enable / disable CDC functionality on a per-node basis. This modifies the logic used for write path allocation rejection
136+
// (standard: never reject. cdc: reject Mutation containing a CDC-enabled table if at space limit in cdc_raw_directory).
137+
configLocal.cdc_enabled = false;
138+
139+
// The default option is “periodic” where writes may be ack'ed immediately and the CommitLog is simply synced
140+
// every commitlog_sync_period_in_ms milliseconds. What this means, is the commit is
141+
configLocal.commitlog_sync = CommitLogSync.periodic;
142+
configLocal.commitlog_sync_period_in_ms = 3000; // sync log every 5s.
143+
configLocal.commitlog_total_space_in_mb = 32;
144+
configLocal.commitlog_segment_size_in_mb = 32;
145+
configLocal.max_mutation_size_in_kb = (configLocal.commitlog_segment_size_in_mb * 1024) / 2;
146+
// configLocal.ideal_consistency_level = ConsistencyLevel.LOCAL_QUORUM; // available in cassandra 4
147+
148+
// ------------------------
149+
// Replication
150+
// ------------------------
151+
152+
// A partitioner determines how data is distributed across the nodes in the cluster (including replicas).
153+
// Basically, a partitioner is a function for deriving a token representing a row from its partition key,
154+
// typically by hashing. Each row of data is then distributed across the cluster by the value of the token.
155+
//
156+
// Default: Murmur3Partitioner using MurmurHash which uses 64-bit hashing function and allows for possible
157+
// range of hash values is from -2^63 to +2^63-1.
158+
// WARNING: You cannot change the partitioner in existing clusters that use a different partitioner.
159+
//
160+
configLocal.partitioner = org.apache.cassandra.dht.Murmur3Partitioner.class.getName();
161+
162+
// hinted_handoff is used to optimize cluster consistency process and anti-entropy (the synchronization of
163+
// replica data on nodes to ensure that the data is fresh) when a replica-owning node is not available to accept
164+
// the write operation (i.e. network issues/etc). This operation DOES NOT guarantee successful write operations,
165+
// except when a client application uses consistency level of `ANY`.
166+
// WARNING: This MUST be enabled for HA setups! It's currently disabled, because we're using single nodes setups.
167+
configLocal.hinted_handoff_enabled = false;
168+
169+
// Endpoint snitch
170+
// This teaches Cassandra enough about your network's topology so it can route requests efficiently and spread
171+
// replicas by grouping machines nto `data centers` and `racks`.
172+
// WARNING: Switching this option CAN CAUSE DATA LOSS (read manual)
173+
// Default: SimpleSnitch (leave it as is for now)
84174
configLocal.endpoint_snitch = "SimpleSnitch";
85175
configLocal.dynamic_snitch_update_interval_in_ms = 100;
86176
configLocal.dynamic_snitch_reset_interval_in_ms = 600000;
87177
configLocal.dynamic_snitch_badness_threshold = 0.1;
88-
configLocal.request_scheduler = "org.apache.cassandra.scheduler.NoScheduler";
89-
configLocal.index_interval = 128;
178+
179+
// Back-pressure settings # If enabled, the coordinator will apply the back-pressure strategy specified below to
180+
// each mutation sent to replicas, with the aim of reducing pressure on overloaded replicas.
181+
// Should be configured usually for cluster setup.
182+
configLocal.back_pressure_enabled = false;
183+
//configLocal.back_pressure_strategy = RateBasedBackPressure.withDefaultParams();
184+
185+
// internode_compression controls whether traffic between nodes is compressed
186+
// all - all traffic is compressed
187+
// dc - traffic between nodes is compressed
188+
// none - no compression (suitable for single nodes)
189+
configLocal.internode_compression = Config.InternodeCompression.none;
190+
191+
// Wanr for GC pauses longer than 500ms (usually means heap is near limit)
192+
configLocal.gc_warn_threshold_in_ms = 500;
90193

91194
this.config = configLocal;
92195
}
@@ -99,24 +202,27 @@ public Integer getPort()
99202
@Override
100203
public String getCassandraConfigLoaderClassName()
101204
{
102-
return "org.strongbox.db.server.CassandraEmbeddedProperties$CassandraEmbeddedPropertiesLoader";
205+
return CassandraEmbeddedPropertiesLoader.class.getName();
103206
}
104207

105208
public String getStorageFolder()
106209
{
107210
return config.data_file_directories[0].replace("/cassandra/data", "");
108211
}
109212

110-
public static class CassandraEmbeddedPropertiesLoader implements ConfigurationLoader
213+
public static class CassandraEmbeddedPropertiesLoader
214+
implements ConfigurationLoader
111215
{
216+
112217
@Override
113218
public Config loadConfig()
114-
throws ConfigurationException
219+
throws ConfigurationException
115220
{
116221
Objects.nonNull(instance);
117222

118223
return instance.config;
119224
}
120225

121226
}
227+
122228
}

strongbox-db-server/src/main/java/org/strongbox/db/server/EmbeddedJanusGraphWithCassandraServer.java

Lines changed: 53 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,18 @@
22

33
import java.io.IOException;
44
import java.lang.reflect.Method;
5+
import java.util.Collections;
56
import java.util.concurrent.ExecutionException;
67

78
import javax.annotation.PostConstruct;
89
import javax.annotation.PreDestroy;
910

11+
import org.apache.cassandra.db.compaction.SizeTieredCompactionStrategy;
1012
import org.apache.cassandra.service.CassandraDaemon;
1113
import org.apache.cassandra.service.StorageService;
1214
import org.janusgraph.core.JanusGraph;
1315
import org.janusgraph.core.JanusGraphFactory;
16+
import org.janusgraph.diskstorage.idmanagement.ConflictAvoidanceMode;
1417
import org.janusgraph.graphdb.database.StandardJanusGraph;
1518
import org.slf4j.Logger;
1619
import org.slf4j.LoggerFactory;
@@ -71,11 +74,10 @@ private CassandraDaemon provideCassandraInstance()
7174
return cassandraDaemon;
7275
}
7376

74-
System.setProperty("cassandra.config.loader",
75-
cassandraEmbeddedProperties.getCassandraConfigLoaderClassName());
77+
System.setProperty("cassandra.config.loader", cassandraEmbeddedProperties.getCassandraConfigLoaderClassName());
7678

7779
System.setProperty("cassandra-foreground", "true");
78-
System.setProperty("cassandra.native.epoll.enabled", "false");
80+
System.setProperty("cassandra.native.epoll.enabled", "true");
7981
System.setProperty("cassandra.unsafesystem", "true");
8082

8183
CassandraDaemon cassandraDaemonLocal = new CassandraDaemon(true);
@@ -113,21 +115,54 @@ private JanusGraph provideJanusGraphInstance()
113115
return janusGraph;
114116
}
115117

116-
JanusGraph janusGraphLocal = JanusGraphFactory.build()
117-
.set("storage.backend", "cql")
118-
.set("storage.hostname", janusGraphProperties.getStorageHost())
119-
.set("storage.port", janusGraphProperties.getStoragePort())
120-
.set("storage.username",
121-
janusGraphProperties.getStorageUsername())
122-
.set("storage.password",
123-
janusGraphProperties.getStoragePassword())
124-
.set("storage.cql.keyspace", "strongbox")
125-
.set("storage.cql.only-use-local-consistency-for-system-operations",
126-
true)
127-
.set("tx.log-tx", true)
128-
.set("schema.default", "none")
129-
.set("schema.constraints", true)
130-
.open();
118+
JanusGraphFactory.Builder builder = JanusGraphFactory.build();
119+
120+
builder.set("gremlin.graph", org.janusgraph.core.JanusGraphFactory.class.getName())
121+
// Disabling database cache since it might lead to inconsistencies.
122+
.set("cache.db-cache", false)
123+
// This is a percentage of the JVM memory - 0.4 means 40% of -Xmx!
124+
.set("cache.db-cache-size", 0)
125+
.set("cache.db-cache-clean-wait", 0) // can be 0 for local database
126+
.set("cache.db-cache-time", 60000)
127+
.set("cache.tx-cache-size", 50000)
128+
.set("cache.tx-dirty-size", 1000)
129+
.set("ids.block-size", 5000) // default: 10000
130+
.set("ids.num-partitions", 3) // default: 10
131+
.set("ids.renew-timeout", 300000) // default: 120000 ms
132+
.set("ids.renew-percentage", 0.2) // percentage; default: 0.3
133+
.set("ids.authority.conflict-avoidance-mode", ConflictAvoidanceMode.GLOBAL_AUTO)
134+
.set("storage.backend", "cql")
135+
.set("storage.hostname", janusGraphProperties.getStorageHost())
136+
.set("storage.port", janusGraphProperties.getStoragePort())
137+
.set("storage.username", janusGraphProperties.getStorageUsername())
138+
.set("storage.password", janusGraphProperties.getStoragePassword())
139+
// Whether JanusGraph should attempt to parallelize storage operations
140+
.set("storage.parallel-backend-ops", true)
141+
.set("storage.cql.keyspace", "strongbox")
142+
// TODO: Make these configurable from strongbox.yaml start-->
143+
.set("storage.cql.only-use-local-consistency-for-system-operations", true)
144+
.set("storage.cql.local-core-connections-per-host", 1)
145+
.set("storage.cql.local-max-connections-per-host", 10)
146+
.set("storage.cql.read-consistency-level", "ONE")
147+
.set("storage.cql.write-consistency-level", "ONE")
148+
.set("storage.cassandra.replication-factor", 1)
149+
// TODO: Make these configurable from strongbox.yaml <--end
150+
.set("storage.cassandra.compaction-strategy-class", SizeTieredCompactionStrategy.class)
151+
.set("storage.cassandra.compaction-strategy-options", Collections.emptyList())
152+
.set("storage.cassandra.frame-size-mb", 0) // thrift is deprecated, but can't be 0 for cassandra 3!
153+
// disable sstable_compression prevent additional unnecessary IO (disk space is cheap)
154+
.set("storage.cassandra.compression-type", null)
155+
.set("storage.lock.wait-time", 5) // default 100 (probably ok for cluster, bad for single node)
156+
.set("storage.lock.retries", 20)
157+
.set("tx.log-tx", true)
158+
.set("schema.default", "none")
159+
.set("schema.constraints", true)
160+
// enable metrics
161+
.set("metrics.enabled", true)
162+
.set("metrics.jmx.enabled", true)
163+
;
164+
165+
JanusGraph janusGraphLocal = builder.open();
131166

132167
try
133168
{

0 commit comments

Comments
 (0)