Skip to content

Commit d9096b2

Browse files
committed
draft PR to fix cluster not able to spin up issue when disk usage exceeds threshold
Signed-off-by: zane-neo <zaniu@amazon.com>
1 parent 01acf1c commit d9096b2

File tree

3 files changed

+44
-9
lines changed

3 files changed

+44
-9
lines changed

server/src/main/java/org/opensearch/cluster/InternalClusterInfoService.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -155,13 +155,17 @@ void setUpdateFrequency(TimeValue updateFrequency) {
155155

156156
@Override
157157
public void clusterChanged(ClusterChangedEvent event) {
158-
if (event.localNodeClusterManager() && refreshAndRescheduleRunnable.get() == null) {
159-
logger.trace("elected as cluster-manager, scheduling cluster info update tasks");
160-
executeRefresh(event.state(), "became cluster-manager");
161-
162-
final RefreshAndRescheduleRunnable newRunnable = new RefreshAndRescheduleRunnable();
163-
refreshAndRescheduleRunnable.set(newRunnable);
164-
threadPool.scheduleUnlessShuttingDown(updateFrequency, REFRESH_EXECUTOR, newRunnable);
158+
if (event.localNodeClusterManager()) {
159+
if (!event.state().blocks().indices().isEmpty()) {
160+
executeRefreshImmediately(event.state());
161+
} else if (refreshAndRescheduleRunnable.get() == null) {
162+
logger.trace("elected as cluster-manager, scheduling cluster info update tasks");
163+
executeRefresh(event.state(), "became cluster-manager");
164+
165+
final RefreshAndRescheduleRunnable newRunnable = new RefreshAndRescheduleRunnable();
166+
refreshAndRescheduleRunnable.set(newRunnable);
167+
threadPool.scheduleUnlessShuttingDown(updateFrequency, REFRESH_EXECUTOR, newRunnable);
168+
}
165169
} else if (event.localNodeClusterManager() == false) {
166170
refreshAndRescheduleRunnable.set(null);
167171
return;
@@ -204,6 +208,14 @@ private void executeRefresh(ClusterState clusterState, String reason) {
204208
}
205209
}
206210

211+
private void executeRefreshImmediately(ClusterState clusterState) {
212+
String reason = "became cluster-manager with indices blocked";
213+
if (!clusterState.nodes().getDataNodes().isEmpty()) {
214+
logger.trace("refreshing cluster info immediately [{}]", reason);
215+
new RefreshRunnable(reason).run();
216+
}
217+
}
218+
207219
@Override
208220
public ClusterInfo getClusterInfo() {
209221
final IndicesStatsSummary indicesStatsSummary = this.indicesStatsSummary; // single volatile read

server/src/main/java/org/opensearch/cluster/block/ClusterBlocks.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,9 @@ public class ClusterBlocks extends AbstractDiffable<ClusterBlocks> {
6767

6868
private final Set<ClusterBlock> global;
6969

70-
private final Map<String, Set<ClusterBlock>> indicesBlocks;
70+
private Map<String, Set<ClusterBlock>> indicesBlocks;
7171

72-
private final EnumMap<ClusterBlockLevel, ImmutableLevelHolder> levelHolders;
72+
private EnumMap<ClusterBlockLevel, ImmutableLevelHolder> levelHolders;
7373

7474
ClusterBlocks(Set<ClusterBlock> global, final Map<String, Set<ClusterBlock>> indicesBlocks) {
7575
this.global = global;
@@ -161,6 +161,24 @@ public boolean hasIndexBlock(String index, ClusterBlock block) {
161161
return indicesBlocks.containsKey(index) && indicesBlocks.get(index).contains(block);
162162
}
163163

164+
public void removeIndexBlock(String index, ClusterBlock block) {
165+
Map<String, Set<ClusterBlock>> newIndicesBlocks = new HashMap<>(indicesBlocks); // copy to avoid UnsupportedOperationException>
166+
for (Map.Entry<String, Set<ClusterBlock>> entry : indicesBlocks.entrySet()) {
167+
String indexName = entry.getKey();
168+
Set<ClusterBlock> clusterBlockSet = new HashSet<>(entry.getValue());
169+
if (indexName.equals(index)) {
170+
clusterBlockSet.remove(block);
171+
if (clusterBlockSet.isEmpty()) {
172+
newIndicesBlocks.remove(indexName);
173+
} else {
174+
newIndicesBlocks.put(indexName, Collections.unmodifiableSet(clusterBlockSet));
175+
}
176+
}
177+
}
178+
this.indicesBlocks = Collections.unmodifiableMap(newIndicesBlocks);
179+
this.levelHolders = generateLevelHolders(global, indicesBlocks);
180+
}
181+
164182
public boolean hasIndexBlockWithId(String index, int blockId) {
165183
final Set<ClusterBlock> clusterBlocks = indicesBlocks.get(index);
166184
if (clusterBlocks != null) {

server/src/main/java/org/opensearch/cluster/routing/allocation/DiskThresholdMonitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@
4040
import org.opensearch.cluster.ClusterInfo;
4141
import org.opensearch.cluster.ClusterState;
4242
import org.opensearch.cluster.DiskUsage;
43+
import org.opensearch.cluster.block.ClusterBlock;
4344
import org.opensearch.cluster.block.ClusterBlockLevel;
45+
import org.opensearch.cluster.block.ClusterBlocks;
4446
import org.opensearch.cluster.metadata.IndexMetadata;
4547
import org.opensearch.cluster.metadata.Metadata;
4648
import org.opensearch.cluster.routing.RerouteService;
@@ -253,6 +255,9 @@ public void onNewInfo(ClusterInfo info) {
253255
}
254256

255257
} else {
258+
for (Map.Entry<String, Set<ClusterBlock>> indexBlockEntry : state.blocks().indices().entrySet()) {
259+
state.blocks().removeIndexBlock(indexBlockEntry.getKey(), IndexMetadata.INDEX_READ_ONLY_ALLOW_DELETE_BLOCK);
260+
}
256261

257262
nodesOverHighThresholdAndRelocating.remove(node);
258263

0 commit comments

Comments
 (0)