Skip to content

Commit b992b67

Browse files
himshikhaHimshikha Gupta
authored andcommitted
remote publication checksum stats (opensearch-project#15957)
* Remote publication checksum stats Signed-off-by: Himshikha Gupta <himshikh@amazon.com> Co-authored-by: Himshikha Gupta <himshikh@amazon.com>
1 parent b017a31 commit b992b67

File tree

5 files changed

+91
-4
lines changed

5 files changed

+91
-4
lines changed

server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteStatePublicationIT.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING;
5757
import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_PUBLICATION_SETTING_KEY;
5858
import static org.opensearch.gateway.remote.RemoteClusterStateUtils.DELIMITER;
59+
import static org.opensearch.gateway.remote.RemoteDownloadStats.CHECKSUM_VALIDATION_FAILED_COUNT;
5960
import static org.opensearch.gateway.remote.model.RemoteClusterBlocks.CLUSTER_BLOCKS;
6061
import static org.opensearch.gateway.remote.model.RemoteCoordinationMetadata.COORDINATION_METADATA;
6162
import static org.opensearch.gateway.remote.model.RemoteCustomMetadata.CUSTOM_METADATA;
@@ -405,10 +406,28 @@ private void assertDataNodeDownloadStats(NodesStatsResponse nodesStatsResponse)
405406
assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getSuccessCount() > 0);
406407
assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getFailedCount());
407408
assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(0).getTotalTimeInMillis() > 0);
409+
assertEquals(
410+
0,
411+
dataNodeDiscoveryStats.getClusterStateStats()
412+
.getPersistenceStats()
413+
.get(0)
414+
.getExtendedFields()
415+
.get(CHECKSUM_VALIDATION_FAILED_COUNT)
416+
.get()
417+
);
408418

409419
assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getSuccessCount() > 0);
410420
assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getFailedCount());
411421
assertTrue(dataNodeDiscoveryStats.getClusterStateStats().getPersistenceStats().get(1).getTotalTimeInMillis() > 0);
422+
assertEquals(
423+
0,
424+
dataNodeDiscoveryStats.getClusterStateStats()
425+
.getPersistenceStats()
426+
.get(1)
427+
.getExtendedFields()
428+
.get(CHECKSUM_VALIDATION_FAILED_COUNT)
429+
.get()
430+
);
412431
}
413432

414433
private Map<String, Integer> getMetadataFiles(BlobStoreRepository repository, String subDirectory) throws IOException {

server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,12 @@ void validateClusterStateFromChecksum(
16441644
failedValidation
16451645
)
16461646
);
1647+
if (isFullStateDownload) {
1648+
remoteStateStats.stateFullDownloadValidationFailed();
1649+
} else {
1650+
remoteStateStats.stateDiffDownloadValidationFailed();
1651+
}
1652+
16471653
if (isFullStateDownload && remoteClusterStateValidationMode.equals(RemoteClusterStateValidationMode.FAILURE)) {
16481654
throw new IllegalStateException(
16491655
"Cluster state checksums do not match during full state read. Validation failed for " + failedValidation
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.gateway.remote;
10+
11+
import org.opensearch.cluster.coordination.PersistedStateStats;
12+
13+
import java.util.concurrent.atomic.AtomicLong;
14+
15+
/**
16+
* Download stats for remote state
17+
*
18+
* @opensearch.internal
19+
*/
20+
public class RemoteDownloadStats extends PersistedStateStats {
21+
static final String CHECKSUM_VALIDATION_FAILED_COUNT = "checksum_validation_failed_count";
22+
private AtomicLong checksumValidationFailedCount = new AtomicLong(0);
23+
24+
public RemoteDownloadStats(String statsName) {
25+
super(statsName);
26+
addToExtendedFields(CHECKSUM_VALIDATION_FAILED_COUNT, checksumValidationFailedCount);
27+
}
28+
29+
public void checksumValidationFailedCount() {
30+
checksumValidationFailedCount.incrementAndGet();
31+
}
32+
33+
public long getChecksumValidationFailedCount() {
34+
return checksumValidationFailedCount.get();
35+
}
36+
}

server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,16 @@
1818
public class RemotePersistenceStats {
1919

2020
RemoteUploadStats remoteUploadStats;
21-
PersistedStateStats remoteDiffDownloadStats;
22-
PersistedStateStats remoteFullDownloadStats;
21+
RemoteDownloadStats remoteDiffDownloadStats;
22+
RemoteDownloadStats remoteFullDownloadStats;
2323

2424
final String FULL_DOWNLOAD_STATS = "remote_full_download";
2525
final String DIFF_DOWNLOAD_STATS = "remote_diff_download";
2626

2727
public RemotePersistenceStats() {
2828
remoteUploadStats = new RemoteUploadStats();
29-
remoteDiffDownloadStats = new PersistedStateStats(DIFF_DOWNLOAD_STATS);
30-
remoteFullDownloadStats = new PersistedStateStats(FULL_DOWNLOAD_STATS);
29+
remoteDiffDownloadStats = new RemoteDownloadStats(DIFF_DOWNLOAD_STATS);
30+
remoteFullDownloadStats = new RemoteDownloadStats(FULL_DOWNLOAD_STATS);
3131
}
3232

3333
public void cleanUpAttemptFailed() {
@@ -90,6 +90,22 @@ public void stateDiffDownloadFailed() {
9090
remoteDiffDownloadStats.stateFailed();
9191
}
9292

93+
public void stateDiffDownloadValidationFailed() {
94+
remoteDiffDownloadStats.checksumValidationFailedCount();
95+
}
96+
97+
public void stateFullDownloadValidationFailed() {
98+
remoteFullDownloadStats.checksumValidationFailedCount();
99+
}
100+
101+
public long getStateDiffDownloadValidationFailed() {
102+
return remoteDiffDownloadStats.getChecksumValidationFailedCount();
103+
}
104+
105+
public long getStateFullDownloadValidationFailed() {
106+
return remoteFullDownloadStats.getChecksumValidationFailedCount();
107+
}
108+
93109
public PersistedStateStats getUploadStats() {
94110
return remoteUploadStats;
95111
}

server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3342,6 +3342,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabledWithNullC
33423342
anyString(),
33433343
anyBoolean()
33443344
);
3345+
assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed());
33453346
}
33463347

33473348
public void testGetClusterStateForManifestWithChecksumValidationEnabled() throws IOException {
@@ -3374,6 +3375,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabled() throws
33743375
);
33753376
mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true);
33763377
verify(mockService, times(1)).validateClusterStateFromChecksum(manifest, clusterState, ClusterName.DEFAULT.value(), NODE_ID, true);
3378+
assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed());
33773379
}
33783380

33793381
public void testGetClusterStateForManifestWithChecksumValidationModeNone() throws IOException {
@@ -3406,6 +3408,7 @@ public void testGetClusterStateForManifestWithChecksumValidationModeNone() throw
34063408
);
34073409
mockService.getClusterStateForManifest(ClusterName.DEFAULT.value(), manifest, NODE_ID, true);
34083410
verify(mockService, times(0)).validateClusterStateFromChecksum(any(), any(), anyString(), anyString(), anyBoolean());
3411+
assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed());
34093412
}
34103413

34113414
public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMismatch() throws IOException {
@@ -3448,6 +3451,7 @@ public void testGetClusterStateForManifestWithChecksumValidationEnabledWithMisma
34483451
NODE_ID,
34493452
true
34503453
);
3454+
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed());
34513455
}
34523456

34533457
public void testGetClusterStateForManifestWithChecksumValidationDebugWithMismatch() throws IOException {
@@ -3494,6 +3498,7 @@ public void testGetClusterStateForManifestWithChecksumValidationDebugWithMismatc
34943498
NODE_ID,
34953499
true
34963500
);
3501+
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateFullDownloadValidationFailed());
34973502
}
34983503

34993504
public void testGetClusterStateUsingDiffWithChecksum() throws IOException {
@@ -3535,6 +3540,7 @@ public void testGetClusterStateUsingDiffWithChecksum() throws IOException {
35353540
eq(NODE_ID),
35363541
eq(false)
35373542
);
3543+
assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed());
35383544
}
35393545

35403546
public void testGetClusterStateUsingDiffWithChecksumModeNone() throws IOException {
@@ -3576,6 +3582,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeNone() throws IOExceptio
35763582
eq(NODE_ID),
35773583
eq(false)
35783584
);
3585+
assertEquals(0, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed());
35793586
}
35803587

35813588
public void testGetClusterStateUsingDiffWithChecksumModeDebugMismatch() throws IOException {
@@ -3616,6 +3623,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeDebugMismatch() throws I
36163623
eq(NODE_ID),
36173624
eq(false)
36183625
);
3626+
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed());
36193627
}
36203628

36213629
public void testGetClusterStateUsingDiffWithChecksumModeTraceMismatch() throws IOException {
@@ -3677,6 +3685,7 @@ public void testGetClusterStateUsingDiffWithChecksumModeTraceMismatch() throws I
36773685
eq(NODE_ID),
36783686
eq(false)
36793687
);
3688+
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed());
36803689
}
36813690

36823691
public void testGetClusterStateUsingDiffWithChecksumMismatch() throws IOException {
@@ -3738,6 +3747,7 @@ public void testGetClusterStateUsingDiffWithChecksumMismatch() throws IOExceptio
37383747
eq(NODE_ID),
37393748
eq(false)
37403749
);
3750+
assertEquals(1, remoteClusterStateService.getRemoteStateStats().getStateDiffDownloadValidationFailed());
37413751
}
37423752

37433753
private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> clusterUUIDsPointers) throws IOException {

0 commit comments

Comments
 (0)