Skip to content

Commit c9bb764

Browse files
authored
GEODE-9060: Remove the member from a copy of replicates as GII candid… (#6246) (#7230)
* GEODE-9060: Remove the member from a copy of replicates as GII candidate if it's not part of the same distributed system, but leave original replicates unchanged. (cherry picked from commit 76a5afd)
1 parent 05c6259 commit c9bb764

2 files changed

Lines changed: 48 additions & 4 deletions

File tree

geode-core/src/distributedTest/java/org/apache/geode/internal/cache/persistence/PersistentRecoveryOrderDUnitTest.java

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -876,11 +876,43 @@ public void testSplitBrain() {
876876
Throwable thrown = catchThrowable(() -> {
877877
createReplicateRegion(regionName, getDiskDirs(getVMId()));
878878
});
879-
assertThat(thrown).isInstanceOf(ConflictingPersistentDataException.class);
879+
assertThat(thrown)
880+
.isInstanceOf(ConflictingPersistentDataException.class)
881+
.hasMessageContaining("was not part of the same distributed system as the local data");
880882
}
881883
});
882884
}
883885

886+
@Test
887+
public void testRecoverableSplitBrain() {
888+
vm2.invoke(() -> {
889+
createReplicateRegion(regionName, getDiskDirs(getVMId()));
890+
});
891+
vm0.invoke(() -> {
892+
createReplicateRegion(regionName, getDiskDirs(getVMId()));
893+
putEntry("A", "B");
894+
getCache().getRegion(regionName).close();
895+
});
896+
897+
vm1.invoke(() -> {
898+
createReplicateRegion(regionName, getDiskDirs(getVMId()));
899+
validateEntry("A", "B");
900+
updateEntry("A", "C");
901+
getCache().getRegion(regionName).close();
902+
});
903+
904+
// VM0 doesn't know that VM1 ever existed so it will start up.
905+
vm0.invoke(() -> {
906+
createReplicateRegion(regionName, getDiskDirs(getVMId()));
907+
validateEntry("A", "C");
908+
});
909+
910+
vm1.invoke(() -> {
911+
createReplicateRegion(regionName, getDiskDirs(getVMId()));
912+
validateEntry("A", "C");
913+
});
914+
}
915+
884916
/**
885917
* Test to make sure that if if a member crashes while a GII is in progress, we wait for the
886918
* member to come back for starting.

geode-core/src/main/java/org/apache/geode/internal/cache/persistence/PersistenceAdvisorImpl.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -510,18 +510,18 @@ public Set<PersistentMemberID> getPersistedMembers() {
510510
public boolean checkMyStateOnMembers(Set<InternalDistributedMember> replicates)
511511
throws ReplyException {
512512
PersistentStateQueryResults remoteStates = getMyStateOnMembers(replicates);
513+
Set<InternalDistributedMember> copyOfReplicates = null;
513514

514515
persistenceAdvisorObserver.observe(regionPath);
515516

516517
boolean equal = false;
518+
PersistentMemberID myId = getPersistentID();
517519
for (Map.Entry<InternalDistributedMember, PersistentMemberState> entry : remoteStates
518520
.getStateOnPeers().entrySet()) {
519521
InternalDistributedMember member = entry.getKey();
520522
PersistentMemberID remoteId = remoteStates.getPersistentIds().get(member);
521523

522-
PersistentMemberID myId = getPersistentID();
523524
PersistentMemberState stateOnPeer = entry.getValue();
524-
525525
if (PersistentMemberState.REVOKED.equals(stateOnPeer)) {
526526
throw new RevokedPersistentDataException(
527527
String.format(
@@ -533,7 +533,19 @@ public boolean checkMyStateOnMembers(Set<InternalDistributedMember> replicates)
533533
String message = String.format(
534534
"Region %s remote member %s with persistent data %s was not part of the same distributed system as the local data from %s",
535535
regionPath, member, remoteId, myId);
536-
throw new ConflictingPersistentDataException(message);
536+
// Conceptually the removed member due to not knowing current member, should be equal to
537+
// existing replicates.
538+
// It can still be used as GII provider candidate. Use a copyOfReplicates to avoid modifying
539+
// the replicates.
540+
if (copyOfReplicates == null) {
541+
copyOfReplicates = new HashSet<>(replicates);
542+
}
543+
copyOfReplicates.remove(member);
544+
if (copyOfReplicates.isEmpty()) {
545+
throw new ConflictingPersistentDataException(message);
546+
} else {
547+
logger.info(message);
548+
}
537549
}
538550

539551
if (myId != null && stateOnPeer == PersistentMemberState.EQUAL) {

0 commit comments

Comments
 (0)