Skip to content

Commit 1255c85

Browse files
committed
Improve RoutingNode invariant assertion efficiency
[BalanceUnbalancedClusterTests take 10 or 11 minutes][1] in CI. The reason is because of O(N^2) assertion behavior. This change moves the invariant assertions out of the individual muting calls up to the RoutingNodes level where it can do the assertion after bulk mutations. This change is safe because assertions are only moved out of package-private methods that are only called by RoutingNodes; the public API of RoutingNode still enforces all the same invariants. [1]: https://build.ci.opensearch.org/job/gradle-check/74280/testReport/org.opensearch.cluster.routing.allocation/BalanceUnbalancedClusterTests/ Signed-off-by: Andrew Ross <andrross@amazon.com>
1 parent 7410452 commit 1255c85

File tree

2 files changed

+6
-7
lines changed

2 files changed

+6
-7
lines changed

server/src/main/java/org/opensearch/cluster/routing/RoutingNode.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,6 @@ public Collection<ShardRouting> getInitializingShards() {
269269
* @param shard Shard to create on this Node
270270
*/
271271
void add(ShardRouting shard) {
272-
assert invariant();
273272
if (shards.put(shard) != null) {
274273
throw new IllegalStateException(
275274
"Trying to add a shard "
@@ -290,11 +289,9 @@ void add(ShardRouting shard) {
290289
relocatingShardsBucket.add(shard);
291290
}
292291
shardsByIndex.computeIfAbsent(shard.index(), k -> new LinkedHashSet<>()).add(shard);
293-
assert invariant();
294292
}
295293

296294
void update(ShardRouting oldShard, ShardRouting newShard) {
297-
assert invariant();
298295
if (shards.containsKey(oldShard.shardId()) == false) {
299296
// Shard was already removed by routing nodes iterator
300297
// TODO: change caller logic in RoutingNodes so that this check can go away
@@ -320,11 +317,9 @@ void update(ShardRouting oldShard, ShardRouting newShard) {
320317
relocatingShardsBucket.add(newShard);
321318
}
322319
shardsByIndex.computeIfAbsent(newShard.index(), k -> new LinkedHashSet<>()).add(newShard);
323-
assert invariant();
324320
}
325321

326322
void remove(ShardRouting shard) {
327-
assert invariant();
328323
ShardRouting previousValue = shards.remove(shard.shardId());
329324
assert previousValue == shard : "expected shard " + previousValue + " but was " + shard;
330325
if (shard.initializing()) {
@@ -338,7 +333,6 @@ void remove(ShardRouting shard) {
338333
if (shardsByIndex.get(shard.index()).isEmpty()) {
339334
shardsByIndex.remove(shard.index());
340335
}
341-
assert invariant();
342336
}
343337

344338
/**
@@ -502,7 +496,7 @@ public boolean isEmpty() {
502496
return shards.isEmpty();
503497
}
504498

505-
private boolean invariant() {
499+
boolean invariant() {
506500

507501
// initializingShards must consistent with that in shards
508502
Collection<ShardRouting> shardRoutingsInitializing = StreamSupport.stream(shards.spliterator(), false)

server/src/main/java/org/opensearch/cluster/routing/RoutingNodes.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ public RoutingNodes(ClusterState clusterState, boolean readOnly) {
174174
}
175175
}
176176
}
177+
assert nodesToShards.values().stream().allMatch(RoutingNode::invariant);
177178
}
178179

179180
private void addRecovery(ShardRouting routing) {
@@ -563,6 +564,7 @@ public ShardRouting initializeShard(
563564
assert unassignedShard.unassigned() : "expected an unassigned shard " + unassignedShard;
564565
ShardRouting initializedShard = unassignedShard.initialize(nodeId, existingAllocationId, expectedSize);
565566
node(nodeId).add(initializedShard);
567+
assert node(nodeId).invariant();
566568
inactiveShardCount++;
567569
if (initializedShard.primary()) {
568570
inactivePrimaryCount++;
@@ -591,6 +593,7 @@ public Tuple<ShardRouting, ShardRouting> relocateShard(
591593
ShardRouting target = source.getTargetRelocatingShard();
592594
updateAssigned(startedShard, source);
593595
node(target.currentNodeId()).add(target);
596+
assert node(target.currentNodeId()).invariant();
594597
assignedShardsAdd(target);
595598
addRecovery(target);
596599
changes.relocationStarted(startedShard, target);
@@ -872,6 +875,7 @@ private ShardRouting promoteActiveReplicaShardToPrimary(ShardRouting replicaShar
872875
private void remove(ShardRouting shard) {
873876
assert shard.unassigned() == false : "only assigned shards can be removed here (" + shard + ")";
874877
node(shard.currentNodeId()).remove(shard);
878+
assert node(shard.currentNodeId()).invariant();
875879
if (shard.initializing() && shard.relocatingNodeId() == null) {
876880
inactiveShardCount--;
877881
assert inactiveShardCount >= 0;
@@ -951,6 +955,7 @@ private void updateAssigned(ShardRouting oldShard, ShardRouting newShard) {
951955
+ " by shard assigned to same node but was "
952956
+ newShard;
953957
node(oldShard.currentNodeId()).update(oldShard, newShard);
958+
assert node(oldShard.currentNodeId()).invariant();
954959
List<ShardRouting> shardsWithMatchingShardId = assignedShards.computeIfAbsent(oldShard.shardId(), k -> new ArrayList<>());
955960
int previousShardIndex = shardsWithMatchingShardId.indexOf(oldShard);
956961
assert previousShardIndex >= 0 : "shard to update " + oldShard + " does not exist in list of assigned shards";

0 commit comments

Comments
 (0)