Skip to content

Commit 9339476

Browse files
authored
feat: improve default configuration for reliability and performance (#696)
* feat: improve default configuration * fix: add missing labels to hdfs-operator-clusterrole-nodes * update changelog * explain properties * move property to core-site.xml * update changelog
1 parent 2bba315 commit 9339476

File tree

3 files changed

+43
-2
lines changed

3 files changed

+43
-2
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.
1212
- Use `--console-log-format` (or `CONSOLE_LOG_FORMAT`) to set the format to `plain` (default) or `json`.
1313
- The operator now defaults to `AES/CTR/NoPadding` for `dfs.encrypt.data.transfer.cipher.suite` to improve security and performance ([#693]).
1414
- The built-in Prometheus servlet is now enabled and metrics are exposed under the `/prom` path of all UI services ([#695]).
15+
- Added several properties to `hdfs-site.xml` and `core-site.xml` that improve general performance and reliability ([#696])
1516

1617
### Changed
1718

@@ -50,6 +51,7 @@ All notable changes to this project will be documented in this file.
5051
[#684]: https://github.com/stackabletech/hdfs-operator/pull/684
5152
[#693]: https://github.com/stackabletech/hdfs-operator/pull/693
5253
[#695]: https://github.com/stackabletech/hdfs-operator/pull/695
54+
[#696]: https://github.com/stackabletech/hdfs-operator/pull/696
5355

5456
## [25.3.0] - 2025-03-21
5557

deploy/helm/hdfs-operator/templates/roles.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,8 @@ apiVersion: rbac.authorization.k8s.io/v1
186186
kind: ClusterRole
187187
metadata:
188188
name: {{ include "operator.name" . }}-clusterrole-nodes
189+
labels:
190+
{{- include "operator.labels" . | nindent 4 }}
189191
rules:
190192
- apiGroups:
191193
- ""

rust/operator-binary/src/hdfs_controller.rs

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,41 @@ fn rolegroup_config_map(
664664
)
665665
.add("dfs.datanode.registered.hostname", "${env.POD_ADDRESS}")
666666
.add("dfs.datanode.registered.port", "${env.DATA_PORT}")
667-
.add("dfs.datanode.registered.ipc.port", "${env.IPC_PORT}");
667+
.add("dfs.datanode.registered.ipc.port", "${env.IPC_PORT}")
668+
// The following two properties are set to "true" because there is a minor chance that data
669+
// written to HDFS is not synced to disk even if a block has been closed.
670+
// Users in HBase can control this explicitly for the WAL, but for flushes and compactions
671+
// I believe they can't as easily (if at all).
672+
// In theory, HBase should be able to recover from these failures, but that comes at a cost
673+
// and there's always a risk.
674+
// Enabling this behavior causes HDFS to sync to disk as soon as possible.
675+
.add("dfs.datanode.sync.behind.writes", "true")
676+
.add("dfs.datanode.synconclose", "true")
677+
// Defaults to 10 since at least 2011.
678+
// This controls the concurrent number of client connections (this includes DataNodes)
679+
// to the NameNode. Ideally, we'd scale this with the number of DataNodes but this would
680+
// lead to restarts of the NameNode.
681+
// This should lead to better performance due to more concurrency.
682+
.add("dfs.namenode.handler.count", "50")
683+
// Defaults to 10 since at least 2012.
684+
// This controls the concurrent number of client connections to the DataNodes.
685+
// We have no idea how many clients there may be, so it's hard to assign a good default.
686+
// Increasing to 50 should lead to better performance due to more concurrency, especially
687+
// with use-cases like HBase.
688+
.add("dfs.datanode.handler.count", "50")
689+
// The following two properties default to 2 and 4 respectively since around 2013.
690+
// They control the number of maximum replication "jobs" a NameNode assigns to
691+
// a DataNode in a single heartbeat.
692+
// Increasing this number will increase network usage during replication events
693+
// but can lead to faster recovery.
694+
.add("dfs.namenode.replication.max-streams", "4")
695+
.add("dfs.namenode.replication.max-streams-hard-limit", "8")
696+
// Defaults to 4096 and hasn't changed since at least 2011.
697+
// The number of threads used for actual data transfer, so not very CPU heavy
698+
// but IO bound. This is why the number is relatively high.
699+
// But today's Java and IO should be able to handle more, so bump it to 8192 for
700+
// better performance/concurrency.
701+
.add("dfs.datanode.max.transfer.threads", "8192");
668702
if hdfs.has_https_enabled() {
669703
hdfs_site.add("dfs.datanode.registered.https.port", "${env.HTTPS_PORT}");
670704
} else {
@@ -683,7 +717,10 @@ fn rolegroup_config_map(
683717
.ha_zookeeper_quorum()
684718
.security_config(hdfs, cluster_info)
685719
.context(BuildSecurityConfigSnafu)?
686-
.enable_prometheus_endpoint();
720+
.enable_prometheus_endpoint()
721+
// The default (4096) hasn't changed since 2009.
722+
// Increase to 128k to allow for faster transfers.
723+
.add("io.file.buffer.size", "131072");
687724
if let Some(hdfs_opa_config) = hdfs_opa_config {
688725
hdfs_opa_config.add_core_site_config(&mut core_site);
689726
}

0 commit comments

Comments
 (0)