From e7794c14c872755824225940dd2fafd70b606bda Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 30 Jun 2025 11:03:54 +1000 Subject: [PATCH 01/65] Track snapshot stats as metrics --- .../repositories/azure/AzureRepository.java | 7 +- .../azure/AzureRepositoryPlugin.java | 5 +- .../azure/AzureRepositorySettingsTests.java | 4 +- ...eCloudStorageBlobStoreRepositoryTests.java | 5 +- .../gcs/GoogleCloudStoragePlugin.java | 5 +- .../gcs/GoogleCloudStorageRepository.java | 7 +- .../gcs/GoogleCloudStoragePluginTests.java | 4 +- .../s3/S3RepositoryThirdPartyTests.java | 4 +- .../s3/S3BlobStoreRepositoryTests.java | 7 +- .../repositories/s3/S3Repository.java | 7 +- .../repositories/s3/S3RepositoryPlugin.java | 12 ++- .../repositories/s3/S3RepositoryTests.java | 4 +- .../repository/url/URLRepositoryPlugin.java | 2 +- .../repositories/url/URLRepository.java | 12 ++- .../repositories/hdfs/HdfsPlugin.java | 2 +- .../repositories/hdfs/HdfsRepository.java | 12 ++- .../plan/ShardSnapshotsServiceIT.java | 2 +- .../repositories/InvalidRepositoryIT.java | 2 +- ...BlobStoreRepositoryOperationPurposeIT.java | 2 +- ...etadataLoadingDuringSnapshotRestoreIT.java | 2 +- .../SnapshotsServiceDoubleFinalizationIT.java | 2 +- .../repositories/RepositoriesModule.java | 5 +- .../repositories/RepositoriesService.java | 53 +++++++++-- .../repositories/Repository.java | 18 +++- .../repositories/SnapshotMetrics.java | 69 +++++++++++++++ .../repositories/SnapshotShardContext.java | 13 ++- .../blobstore/BlobStoreRepository.java | 46 +++++++--- .../blobstore/BlobStoreSnapshotMetrics.java | 88 +++++++++++++++++++ .../blobstore/MeteredBlobStoreRepository.java | 6 +- .../repositories/fs/FsRepository.java | 12 ++- .../ReservedRepositoryActionTests.java | 3 +- .../RepositoriesServiceTests.java | 16 ++-- ...bStoreRepositoryDeleteThrottlingTests.java | 2 +- .../snapshots/SnapshotResiliencyTests.java | 2 +- ...ncySimulatingBlobStoreRepositoryTests.java | 2 +- .../LatencySimulatingRepositoryPlugin.java | 2 +- .../snapshots/mockstore/MockRepository.java | 2 +- .../java/org/elasticsearch/xpack/ccr/Ccr.java | 2 +- .../SourceOnlySnapshotRepository.java | 18 +++- .../lucene/bwc/AbstractArchiveTestCase.java | 2 +- ...chableSnapshotDiskThresholdIntegTests.java | 2 +- ...archableSnapshotsPrewarmingIntegTests.java | 2 +- ...SnapshotRecoveryStateIntegrationTests.java | 2 +- .../xpack/slm/SLMHealthBlockedSnapshotIT.java | 2 +- .../xpack/slm/SLMStatDisruptionIT.java | 4 +- .../SnapshotBasedIndexRecoveryIT.java | 6 +- .../analyze/RepositoryAnalysisFailureIT.java | 5 +- .../analyze/RepositoryAnalysisSuccessIT.java | 5 +- .../votingonly/VotingOnlyNodePluginTests.java | 2 +- 49 files changed, 405 insertions(+), 95 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java create mode 100644 server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepository.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepository.java index dceafe753e60e..ed0de60dbd2e9 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepository.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepository.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoriesMetrics; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.MeteredBlobStoreRepository; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -118,7 +119,8 @@ public AzureRepository( final ClusterService clusterService, final BigArrays bigArrays, final RecoverySettings recoverySettings, - final RepositoriesMetrics repositoriesMetrics + final RepositoriesMetrics repositoriesMetrics, + final SnapshotMetrics snapshotMetrics ) { super( projectId, @@ -128,7 +130,8 @@ public AzureRepository( bigArrays, recoverySettings, buildBasePath(metadata), - buildLocation(metadata) + buildLocation(metadata), + snapshotMetrics ); this.chunkSize = Repository.CHUNK_SIZE_SETTING.get(metadata.settings()); this.storageService = storageService; diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java index 62d45cb399bfc..17c6ebcf18095 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java @@ -62,7 +62,7 @@ public Map getRepositories( RecoverySettings recoverySettings, RepositoriesMetrics repositoriesMetrics ) { - return Collections.singletonMap(AzureRepository.TYPE, (projectId, metadata) -> { + return Collections.singletonMap(AzureRepository.TYPE, (projectId, metadata, snapshotMetrics) -> { AzureStorageService storageService = azureStoreService.get(); assert storageService != null; return new AzureRepository( @@ -73,7 +73,8 @@ public Map getRepositories( clusterService, bigArrays, recoverySettings, - repositoriesMetrics + repositoriesMetrics, + snapshotMetrics ); }); } diff --git a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureRepositorySettingsTests.java b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureRepositorySettingsTests.java index 5f56e321d8f52..4f6c757f49d75 100644 --- a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureRepositorySettingsTests.java +++ b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureRepositorySettingsTests.java @@ -19,6 +19,7 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoriesMetrics; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreTestUtil; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -46,7 +47,8 @@ private AzureRepository azureRepository(Settings settings) { BlobStoreTestUtil.mockClusterService(), MockBigArrays.NON_RECYCLING_INSTANCE, new RecoverySettings(settings, new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), - RepositoriesMetrics.NOOP + RepositoriesMetrics.NOOP, + SnapshotMetrics.NOOP ); assertThat(azureRepository.getProjectId(), equalTo(projectId)); assertThat(azureRepository.getBlobStore(), is(nullValue())); diff --git a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java index 0ee1f8073f6cb..2906bc63ffe05 100644 --- a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java +++ b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java @@ -276,7 +276,7 @@ public Map getRepositories( ) { return Collections.singletonMap( GoogleCloudStorageRepository.TYPE, - (projectId, metadata) -> new GoogleCloudStorageRepository( + (projectId, metadata, snapshotMetrics) -> new GoogleCloudStorageRepository( projectId, metadata, registry, @@ -284,7 +284,8 @@ public Map getRepositories( clusterService, bigArrays, recoverySettings, - new GcsRepositoryStatsCollector() + new GcsRepositoryStatsCollector(), + snapshotMetrics ) { @Override protected GoogleCloudStorageBlobStore createBlobStore() { diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java index 97b781255bbb6..8f606535db716 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java @@ -57,7 +57,7 @@ public Map getRepositories( ) { return Collections.singletonMap( GoogleCloudStorageRepository.TYPE, - (projectId, metadata) -> new GoogleCloudStorageRepository( + (projectId, metadata, snapshotMetrics) -> new GoogleCloudStorageRepository( projectId, metadata, namedXContentRegistry, @@ -65,7 +65,8 @@ public Map getRepositories( clusterService, bigArrays, recoverySettings, - new GcsRepositoryStatsCollector(clusterService.threadPool(), metadata, repositoriesMetrics) + new GcsRepositoryStatsCollector(clusterService.threadPool(), metadata, repositoriesMetrics), + snapshotMetrics ) ); } diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java index 91973186bbe39..b47aa830d2484 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java @@ -24,6 +24,7 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoryException; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.MeteredBlobStoreRepository; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -96,7 +97,8 @@ class GoogleCloudStorageRepository extends MeteredBlobStoreRepository { final ClusterService clusterService, final BigArrays bigArrays, final RecoverySettings recoverySettings, - final GcsRepositoryStatsCollector statsCollector + final GcsRepositoryStatsCollector statsCollector, + final SnapshotMetrics snapshotMetrics ) { super( projectId, @@ -106,7 +108,8 @@ class GoogleCloudStorageRepository extends MeteredBlobStoreRepository { bigArrays, recoverySettings, buildBasePath(metadata), - buildLocation(metadata) + buildLocation(metadata), + snapshotMetrics ); this.storageService = storageService; this.chunkSize = getSetting(CHUNK_SIZE, metadata); diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePluginTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePluginTests.java index 96750968d898f..7b9ef189b7459 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePluginTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePluginTests.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.MockBigArrays; import org.elasticsearch.indices.recovery.RecoverySettings; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreTestUtil; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -64,7 +65,8 @@ public void testRepositoryProjectId() { BlobStoreTestUtil.mockClusterService(), MockBigArrays.NON_RECYCLING_INSTANCE, new RecoverySettings(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), - mock(GcsRepositoryStatsCollector.class) + mock(GcsRepositoryStatsCollector.class), + SnapshotMetrics.NOOP ); assertThat(repository.getProjectId(), equalTo(projectId)); } diff --git a/modules/repository-s3/qa/third-party/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java b/modules/repository-s3/qa/third-party/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java index 9dbabe16538b4..f6b37b3f34c89 100644 --- a/modules/repository-s3/qa/third-party/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java +++ b/modules/repository-s3/qa/third-party/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java @@ -34,6 +34,7 @@ import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.repositories.AbstractThirdPartyRepositoryTestCase; import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.fixtures.minio.MinioTestContainer; @@ -155,7 +156,8 @@ public long absoluteTimeInMillis() { ClusterServiceUtils.createClusterService(threadpool), BigArrays.NON_RECYCLING_INSTANCE, new RecoverySettings(node().settings(), node().injector().getInstance(ClusterService.class).getClusterSettings()), - S3RepositoriesMetrics.NOOP + S3RepositoriesMetrics.NOOP, + SnapshotMetrics.NOOP ) ) { repository.start(); diff --git a/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3BlobStoreRepositoryTests.java b/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3BlobStoreRepositoryTests.java index 2c363177f92ec..e431c7e25d250 100644 --- a/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3BlobStoreRepositoryTests.java +++ b/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3BlobStoreRepositoryTests.java @@ -52,6 +52,7 @@ import org.elasticsearch.repositories.RepositoryData; import org.elasticsearch.repositories.RepositoryMissingException; import org.elasticsearch.repositories.RepositoryStats; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.blobstore.BlobStoreTestUtil; import org.elasticsearch.repositories.blobstore.ESMockAPIBasedRepositoryIntegTestCase; @@ -591,7 +592,8 @@ protected S3Repository createRepository( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - S3RepositoriesMetrics s3RepositoriesMetrics + S3RepositoriesMetrics s3RepositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return new S3Repository( projectId, @@ -601,7 +603,8 @@ protected S3Repository createRepository( clusterService, bigArrays, recoverySettings, - s3RepositoriesMetrics + s3RepositoriesMetrics, + snapshotMetrics ) { @Override diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java index 92b0dcc2f1cdc..4670a90ff88cd 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java @@ -39,6 +39,7 @@ import org.elasticsearch.repositories.FinalizeSnapshotContext; import org.elasticsearch.repositories.RepositoryData; import org.elasticsearch.repositories.RepositoryException; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.MeteredBlobStoreRepository; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotsService; @@ -283,7 +284,8 @@ class S3Repository extends MeteredBlobStoreRepository { final ClusterService clusterService, final BigArrays bigArrays, final RecoverySettings recoverySettings, - final S3RepositoriesMetrics s3RepositoriesMetrics + final S3RepositoriesMetrics s3RepositoriesMetrics, + final SnapshotMetrics snapshotMetrics ) { super( projectId, @@ -293,7 +295,8 @@ class S3Repository extends MeteredBlobStoreRepository { bigArrays, recoverySettings, buildBasePath(metadata), - buildLocation(metadata) + buildLocation(metadata), + snapshotMetrics ); this.service = service; this.s3RepositoriesMetrics = s3RepositoriesMetrics; diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java index ab836040efa9a..9b63f193d762e 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java @@ -29,6 +29,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.watcher.ResourceWatcherService; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -65,7 +66,8 @@ protected S3Repository createRepository( final ClusterService clusterService, final BigArrays bigArrays, final RecoverySettings recoverySettings, - final S3RepositoriesMetrics s3RepositoriesMetrics + final S3RepositoriesMetrics s3RepositoriesMetrics, + final SnapshotMetrics snapshotMetrics ) { return new S3Repository( projectId, @@ -75,7 +77,8 @@ protected S3Repository createRepository( clusterService, bigArrays, recoverySettings, - s3RepositoriesMetrics + s3RepositoriesMetrics, + snapshotMetrics ); } @@ -118,14 +121,15 @@ public Map getRepositories( final S3RepositoriesMetrics s3RepositoriesMetrics = new S3RepositoriesMetrics(repositoriesMetrics); return Collections.singletonMap( S3Repository.TYPE, - (projectId, metadata) -> createRepository( + (projectId, metadata, snapshotMetrics) -> createRepository( projectId, metadata, registry, clusterService, bigArrays, recoverySettings, - s3RepositoriesMetrics + s3RepositoriesMetrics, + snapshotMetrics ) ); } diff --git a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java index d41631a79739f..b238e42f76242 100644 --- a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java +++ b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java @@ -28,6 +28,7 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoryException; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreTestUtil; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; @@ -178,7 +179,8 @@ private S3Repository createS3Repo(RepositoryMetadata metadata) { BlobStoreTestUtil.mockClusterService(), MockBigArrays.NON_RECYCLING_INSTANCE, new RecoverySettings(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), - S3RepositoriesMetrics.NOOP + S3RepositoriesMetrics.NOOP, + SnapshotMetrics.NOOP ); assertThat(s3Repository.getProjectId(), equalTo(ProjectId.DEFAULT)); return s3Repository; diff --git a/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java b/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java index 821b761aa663d..4a2dc21d6e9f3 100644 --- a/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java +++ b/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java @@ -53,7 +53,7 @@ public Map getRepositories( ) { return Collections.singletonMap( URLRepository.TYPE, - (projectId, metadata) -> new URLRepository( + (projectId, metadata, snapshotMetrics) -> new URLRepository( projectId, metadata, env, diff --git a/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java b/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java index b2ba97cdd4a1c..3a6f2268f2878 100644 --- a/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java +++ b/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java @@ -28,6 +28,7 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoryException; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -102,7 +103,16 @@ public URLRepository( RecoverySettings recoverySettings, URLHttpClient.Factory httpClientFactory ) { - super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, BlobPath.EMPTY); + super( + projectId, + metadata, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings, + BlobPath.EMPTY, + SnapshotMetrics.NOOP + ); if (URL_SETTING.exists(metadata.settings()) == false && REPOSITORIES_URL_SETTING.exists(environment.settings()) == false) { throw new RepositoryException(metadata.name(), "missing url"); diff --git a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java index 9d80a270c7242..34c7dbd611418 100644 --- a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java +++ b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java @@ -71,7 +71,7 @@ public Map getRepositories( ) { return Collections.singletonMap( "hdfs", - (projectId, metadata) -> new HdfsRepository( + (projectId, metadata, snapshotMetrics) -> new HdfsRepository( projectId, metadata, env, diff --git a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java index de01da575b8ba..19c7a2366b2b5 100644 --- a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java +++ b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java @@ -31,6 +31,7 @@ import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; import org.elasticsearch.repositories.RepositoryException; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -65,7 +66,16 @@ public HdfsRepository( BigArrays bigArrays, RecoverySettings recoverySettings ) { - super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, BlobPath.EMPTY); + super( + projectId, + metadata, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings, + BlobPath.EMPTY, + SnapshotMetrics.NOOP + ); this.environment = environment; this.chunkSize = metadata.settings().getAsBytesSize("chunk_size", null); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java index 06e4d06fcee0e..a3e8422068f2f 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java @@ -80,7 +80,7 @@ public Map getRepositories( ) { return Collections.singletonMap( TYPE, - (projectId, metadata) -> new FailingRepo( + (projectId, metadata, snapshotMetrics) -> new FailingRepo( projectId, metadata, env, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java index 62967fc2d035f..bfcab499f1043 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java @@ -78,7 +78,7 @@ public Map getRepositories( ) { return Collections.singletonMap( TYPE, - (projectId, metadata) -> new UnstableRepository( + (projectId, metadata, snapshotMetrics) -> new UnstableRepository( projectId, metadata, env, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java index fb119caa25cb6..1980c3c78eeb1 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java @@ -101,7 +101,7 @@ public Map getRepositories( ) { return Map.of( ASSERTING_REPO_TYPE, - (projectId, metadata) -> new AssertingRepository( + (projectId, metadata, snapshotMetrics) -> new AssertingRepository( projectId, metadata, env, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java index 89a8af49efdef..56a40383006a4 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java @@ -218,7 +218,7 @@ public Map getRepositories( ) { return Collections.singletonMap( TYPE, - (projectId, metadata) -> new CountingMockRepository( + (projectId, metadata, snapshotMetrics) -> new CountingMockRepository( projectId, metadata, env, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java index 7c00a93fa495b..a0c14b2df0dc2 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java @@ -213,7 +213,7 @@ public Map getRepositories( ) { return Map.of( REPO_TYPE, - (projectId, metadata) -> new TestRepository( + (projectId, metadata, snapshotMetrics) -> new TestRepository( projectId, metadata, env, diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java index 22adf929a3a08..c51e796d708d8 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java @@ -54,7 +54,7 @@ public RepositoriesModule( Map factories = new HashMap<>(); factories.put( FsRepository.TYPE, - (projectId, metadata) -> new FsRepository( + (projectId, metadata, snapshotMetrics) -> new FsRepository( projectId, metadata, env, @@ -135,7 +135,8 @@ public RepositoriesModule( internalRepositoryTypes, threadPool, client, - preRestoreChecks + preRestoreChecks, + telemetryProvider.getMeterRegistry() ); } diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java index 6248e74bee109..22d4a6b6d8e4d 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java @@ -58,6 +58,8 @@ import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.blobstore.MeteredBlobStoreRepository; import org.elasticsearch.snapshots.Snapshot; +import org.elasticsearch.telemetry.metric.LongWithAttributes; +import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.threadpool.ThreadPool; import java.io.IOException; @@ -123,8 +125,8 @@ public class RepositoriesService extends AbstractLifecycleComponent implements C private final RepositoriesStatsArchive repositoriesStatsArchive; private final List> preRestoreChecks; + private final SnapshotMetrics snapshotMetrics; - @SuppressWarnings("this-escape") public RepositoriesService( Settings settings, ClusterService clusterService, @@ -133,6 +135,20 @@ public RepositoriesService( ThreadPool threadPool, NodeClient client, List> preRestoreChecks + ) { + this(settings, clusterService, typesRegistry, internalTypesRegistry, threadPool, client, preRestoreChecks, MeterRegistry.NOOP); + } + + @SuppressWarnings("this-escape") + public RepositoriesService( + Settings settings, + ClusterService clusterService, + Map typesRegistry, + Map internalTypesRegistry, + ThreadPool threadPool, + NodeClient client, + List> preRestoreChecks, + MeterRegistry meterRegistry ) { this.typesRegistry = typesRegistry; this.internalTypesRegistry = internalTypesRegistry; @@ -152,6 +168,7 @@ public RepositoriesService( threadPool.relativeTimeInMillisSupplier() ); this.preRestoreChecks = preRestoreChecks; + this.snapshotMetrics = new SnapshotMetrics(meterRegistry, this::getSnapshotsInProgress); } /** @@ -750,7 +767,8 @@ private void applyProjectStateForAddedOrExistingProject(long version, ProjectSta projectId, repositoryMetadata, typesRegistry, - RepositoriesService::createUnknownTypeRepository + RepositoriesService::createUnknownTypeRepository, + snapshotMetrics ); } catch (RepositoryException ex) { // TODO: this catch is bogus, it means the old repo is already closed, @@ -765,7 +783,8 @@ private void applyProjectStateForAddedOrExistingProject(long version, ProjectSta projectId, repositoryMetadata, typesRegistry, - RepositoriesService::createUnknownTypeRepository + RepositoriesService::createUnknownTypeRepository, + snapshotMetrics ); } catch (RepositoryException ex) { logger.warn(() -> "failed to create repository " + projectRepoString(projectId, repositoryMetadata.name()), ex); @@ -941,7 +960,8 @@ public void registerInternalRepository(ProjectId projectId, String name, String projectId, metadata, internalTypesRegistry, - RepositoriesService::throwRepositoryTypeDoesNotExists + RepositoriesService::throwRepositoryTypeDoesNotExists, + snapshotMetrics ); final var newRepos = new HashMap<>(existingRepos); newRepos.put(name, repo); @@ -1021,7 +1041,8 @@ private static Repository createRepository( @Nullable ProjectId projectId, RepositoryMetadata repositoryMetadata, Map factories, - BiFunction defaultFactory + BiFunction defaultFactory, + SnapshotMetrics snapshotMetrics ) { logger.debug("creating repository [{}][{}]", repositoryMetadata.type(), repositoryMetadata.name()); Repository.Factory factory = factories.get(repositoryMetadata.type()); @@ -1030,7 +1051,7 @@ private static Repository createRepository( } Repository repository = null; try { - repository = factory.create(projectId, repositoryMetadata, factories::get); + repository = factory.create(projectId, repositoryMetadata, factories::get, snapshotMetrics); repository.start(); return repository; } catch (Exception e) { @@ -1061,7 +1082,8 @@ public Repository createRepository(ProjectId projectId, RepositoryMetadata repos Objects.requireNonNull(projectId), repositoryMetadata, typesRegistry, - RepositoriesService::throwRepositoryTypeDoesNotExists + RepositoriesService::throwRepositoryTypeDoesNotExists, + snapshotMetrics ); } @@ -1072,7 +1094,22 @@ public Repository createRepository(ProjectId projectId, RepositoryMetadata repos public Repository createNonProjectRepository(RepositoryMetadata repositoryMetadata) { assert DiscoveryNode.isStateless(clusterService.getSettings()) : "outside stateless only project level repositories are allowed: " + repositoryMetadata; - return createRepository(null, repositoryMetadata, typesRegistry, RepositoriesService::throwRepositoryTypeDoesNotExists); + return createRepository( + null, + repositoryMetadata, + typesRegistry, + RepositoriesService::throwRepositoryTypeDoesNotExists, + snapshotMetrics + ); + } + + private Collection getSnapshotsInProgress() { + return repositories.values() + .stream() + .flatMap(repositories -> repositories.values().stream()) + .map(Repository::getShardSnapshotsInProgress) + .filter(Objects::nonNull) + .toList(); } private static Repository throwRepositoryTypeDoesNotExists(ProjectId projectId, RepositoryMetadata repositoryMetadata) { diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index 74c5f3e0e9dc0..2ae246934d619 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -26,6 +26,7 @@ import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; +import org.elasticsearch.telemetry.metric.LongWithAttributes; import org.elasticsearch.threadpool.ThreadPool; import java.io.IOException; @@ -62,7 +63,7 @@ interface Factory { * @param projectId the project-id for the repository or {@code null} if the repository is at the cluster level. * @param metadata metadata for the repository including name and settings */ - Repository create(@Nullable ProjectId projectId, RepositoryMetadata metadata) throws Exception; + Repository create(@Nullable ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) throws Exception; /** * Constructs a repository. @@ -73,9 +74,10 @@ interface Factory { default Repository create( @Nullable ProjectId projectId, RepositoryMetadata metadata, - Function typeLookup + Function typeLookup, + SnapshotMetrics snapshotMetrics ) throws Exception { - return create(projectId, metadata); + return create(projectId, metadata, snapshotMetrics); } } @@ -345,4 +347,14 @@ default Set getUsageFeatures() { static boolean assertSnapshotMetaThread() { return ThreadPool.assertCurrentThreadPool(ThreadPool.Names.SNAPSHOT_META); } + + /** + * Get the current count of snapshots in progress + * + * @return The current number of shard snapshots in progress + */ + @Nullable + default LongWithAttributes getShardSnapshotsInProgress() { + return null; + } } diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java new file mode 100644 index 0000000000000..dd9e7f704312f --- /dev/null +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -0,0 +1,69 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.repositories; + +import org.elasticsearch.cluster.metadata.RepositoryMetadata; +import org.elasticsearch.telemetry.metric.LongCounter; +import org.elasticsearch.telemetry.metric.LongGauge; +import org.elasticsearch.telemetry.metric.LongWithAttributes; +import org.elasticsearch.telemetry.metric.MeterRegistry; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.function.Supplier; + +public record SnapshotMetrics( + LongCounter snapshotsStartedCounter, + LongCounter snapshotsCompletedCounter, + LongGauge snapshotsInProgressGauge, + LongCounter snapshotBlobsUploadedCounter, + LongCounter snapshotBytesUploadedCounter, + LongCounter snapshotUploadDurationCounter, + LongCounter snapshotUploadReadDurationCounter, + LongCounter snapshotCreateThrottleDurationCounter, + LongCounter snapshotRestoreThrottleDurationCounter +) { + + public static final SnapshotMetrics NOOP = new SnapshotMetrics(MeterRegistry.NOOP, List::of); + + public static final String SNAPSHOTS_STARTED = "es.repositories.snapshots.started.total"; + public static final String SNAPSHOTS_COMPLETED = "es.repositories.snapshots.completed.total"; + public static final String SNAPSHOTS_IN_PROGRESS = "es.repositories.snapshots.current"; + public static final String SNAPSHOT_BLOBS_UPLOADED = "es.repositories.snapshots.blobs.uploaded.total"; + public static final String SNAPSHOT_BYTES_UPLOADED = "es.repositories.snapshots.upload.bytes.total"; + public static final String SNAPSHOT_UPLOAD_DURATION = "es.repositories.snapshots.upload.upload_time.total"; + public static final String SNAPSHOT_UPLOAD_READ_DURATION = "es.repositories.snapshots.upload.read_time.total"; + public static final String SNAPSHOT_CREATE_THROTTLE_DURATION = "es.repositories.snapshots.create_throttling.time.total"; + public static final String SNAPSHOT_RESTORE_THROTTLE_DURATION = "es.repositories.snapshots.restore_throttling.time.total"; + + public SnapshotMetrics(MeterRegistry meterRegistry, Supplier> shardSnapshotsInProgressObserver) { + this( + meterRegistry.registerLongCounter(SNAPSHOTS_STARTED, "shard snapshots started", "unit"), + meterRegistry.registerLongCounter(SNAPSHOTS_COMPLETED, "shard snapshots completed", "unit"), + meterRegistry.registerLongsGauge( + SNAPSHOTS_IN_PROGRESS, + "shard snapshots in progress", + "unit", + shardSnapshotsInProgressObserver + ), + meterRegistry.registerLongCounter(SNAPSHOT_BLOBS_UPLOADED, "snapshot blobs uploaded", "unit"), + meterRegistry.registerLongCounter(SNAPSHOT_BYTES_UPLOADED, "snapshot bytes uploaded", "bytes"), + meterRegistry.registerLongCounter(SNAPSHOT_UPLOAD_DURATION, "snapshot upload duration", "ns"), + meterRegistry.registerLongCounter(SNAPSHOT_UPLOAD_READ_DURATION, "time spent in read() calls when snapshotting", "ns"), + meterRegistry.registerLongCounter(SNAPSHOT_CREATE_THROTTLE_DURATION, "time throttled in snapshot create", "bytes"), + meterRegistry.registerLongCounter(SNAPSHOT_RESTORE_THROTTLE_DURATION, "time throttled in snapshot restore", "bytes") + ); + } + + public static Map createAttributesMap(RepositoryMetadata meta) { + return Map.of("repo_type", meta.type(), "repo_name", meta.name()); + } +} diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotShardContext.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotShardContext.java index 85692603b1e14..d0b2aca3774ec 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotShardContext.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotShardContext.java @@ -12,6 +12,7 @@ import org.apache.lucene.index.IndexCommit; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.DelegatingActionListener; +import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; @@ -67,7 +68,7 @@ public SnapshotShardContext( final long snapshotStartTime, ActionListener listener ) { - super(commitRef.closingBefore(listener)); + super(createListener(commitRef.closingBefore(listener))); this.store = store; this.mapperService = mapperService; this.snapshotId = snapshotId; @@ -79,6 +80,12 @@ public SnapshotShardContext( this.snapshotStartTime = snapshotStartTime; } + private static SubscribableListener createListener(ActionListener listener) { + final SubscribableListener objectSubscribableListener = new SubscribableListener<>(); + objectSubscribableListener.addListener(listener); + return objectSubscribableListener; + } + public Store store() { return store; } @@ -131,4 +138,8 @@ public Releasable withCommitRef() { throw new IndexShardSnapshotFailedException(store.shardId(), "Store got closed concurrently"); } } + + public void addListener(ActionListener listener) { + ((SubscribableListener) this.delegate).addListener(listener); + } } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 996c9cda4deab..c2c463a73fda5 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -73,7 +73,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.store.InputStreamIndexInput; -import org.elasticsearch.common.metrics.CounterMetric; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; @@ -122,6 +121,7 @@ import org.elasticsearch.repositories.ShardGeneration; import org.elasticsearch.repositories.ShardGenerations; import org.elasticsearch.repositories.ShardSnapshotResult; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.SnapshotShardContext; import org.elasticsearch.snapshots.AbortedSnapshotException; import org.elasticsearch.snapshots.PausedSnapshotException; @@ -131,6 +131,7 @@ import org.elasticsearch.snapshots.SnapshotMissingException; import org.elasticsearch.snapshots.SnapshotsService; import org.elasticsearch.tasks.TaskCancelledException; +import org.elasticsearch.telemetry.metric.LongWithAttributes; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.LeakTracker; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -356,14 +357,12 @@ public static String getRepositoryDataBlobName(long repositoryGeneration) { private final boolean cacheRepositoryData; + private final BlobStoreSnapshotMetrics blobStoreSnapshotMetrics; + private volatile RateLimiter snapshotRateLimiter; private volatile RateLimiter restoreRateLimiter; - private final CounterMetric snapshotRateLimitingTimeInNanos = new CounterMetric(); - - private final CounterMetric restoreRateLimitingTimeInNanos = new CounterMetric(); - public static final ChecksumBlobStoreFormat GLOBAL_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( "metadata", METADATA_NAME_FORMAT, @@ -494,7 +493,8 @@ protected BlobStoreRepository( final ClusterService clusterService, final BigArrays bigArrays, final RecoverySettings recoverySettings, - final BlobPath basePath + final BlobPath basePath, + final SnapshotMetrics snapshotMetrics ) { this.projectId = projectId; this.metadata = metadata; @@ -528,6 +528,7 @@ protected BlobStoreRepository( threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), threadPool.executor(ThreadPool.Names.SNAPSHOT) ); + this.blobStoreSnapshotMetrics = new BlobStoreSnapshotMetrics(metadata, snapshotMetrics); } @Override @@ -2178,12 +2179,12 @@ RateLimiter getRestoreRateLimiter() { @Override public long getSnapshotThrottleTimeInNanos() { - return snapshotRateLimitingTimeInNanos.count(); + return blobStoreSnapshotMetrics.snapshotRateLimitingTimeInNanos(); } @Override public long getRestoreThrottleTimeInNanos() { - return restoreRateLimitingTimeInNanos.count(); + return blobStoreSnapshotMetrics.restoreRateLimitingTimeInNanos(); } private void assertSnapshotOrStatelessPermittedThreadPool() { @@ -3213,6 +3214,8 @@ public void snapshotShard(SnapshotShardContext context) { } private void doSnapshotShard(SnapshotShardContext context) { + blobStoreSnapshotMetrics.shardSnapshotStarted(); + context.addListener(ActionListener.running(blobStoreSnapshotMetrics::shardSnapshotCompleted)); if (isReadOnly()) { context.onFailure(new RepositoryException(metadata.name(), "cannot snapshot shard on a readonly repository")); return; @@ -3485,6 +3488,7 @@ private void doSnapshotShard(SnapshotShardContext context) { ); snapshotStatus.updateStatusDescription("all files uploaded: done"); snapshotStatus.moveToDone(threadPool.absoluteTimeInMillis(), shardSnapshotResult); + blobStoreSnapshotMetrics.shardSnapshotCompleted(); context.onResponse(shardSnapshotResult); }, e -> { try { @@ -3755,7 +3759,7 @@ private static InputStream maybeRateLimit( * recorded in the value returned by {@link BlobStoreRepository#getRestoreThrottleTimeInNanos}. */ public InputStream maybeRateLimitRestores(InputStream stream) { - return maybeRateLimitRestores(stream, restoreRateLimitingTimeInNanos::inc); + return maybeRateLimitRestores(stream, blobStoreSnapshotMetrics::incrementRestoreRateLimitingTimeInNanos); } /** @@ -3778,7 +3782,7 @@ public InputStream maybeRateLimitRestores(InputStream stream, RateLimitingInputS * `indices.recovery.max_bytes_per_sec` speed. */ public InputStream maybeRateLimitSnapshots(InputStream stream) { - return maybeRateLimitSnapshots(stream, snapshotRateLimitingTimeInNanos::inc); + return maybeRateLimitSnapshots(stream, blobStoreSnapshotMetrics::incrementSnapshotRateLimitingTimeInNanos); } /** @@ -4110,13 +4114,19 @@ protected void snapshotFile(SnapshotShardContext context, FileInfo fileInfo) thr @Override public int read() throws IOException { checkAborted(); - return super.read(); + final long beforeReadNanos = threadPool.relativeTimeInNanos(); + int value = super.read(); + blobStoreSnapshotMetrics.incrementUploadReadTime(threadPool().relativeTimeInNanos() - beforeReadNanos); + return value; } @Override public int read(byte[] b, int off, int len) throws IOException { checkAborted(); - return super.read(b, off, len); + final long beforeReadNanos = threadPool.relativeTimeInNanos(); + int amountRead = super.read(b, off, len); + blobStoreSnapshotMetrics.incrementUploadReadTime(threadPool().relativeTimeInNanos() - beforeReadNanos); + return amountRead; } private void checkAborted() { @@ -4125,18 +4135,21 @@ private void checkAborted() { }; final String partName = fileInfo.partName(i); logger.trace("[{}] Writing [{}] to [{}]", metadata.name(), partName, shardContainer.path()); - final long startMS = threadPool.relativeTimeInMillis(); + final long startNanos = threadPool.relativeTimeInMillis(); shardContainer.writeBlob(OperationPurpose.SNAPSHOT_DATA, partName, inputStream, partBytes, false); + final long uploadTimeInNanos = threadPool.relativeTimeInMillis() - startNanos; + blobStoreSnapshotMetrics.incrementCountersForPartUpload(partBytes, uploadTimeInNanos); logger.trace( "[{}] Writing [{}] of size [{}b] to [{}] took [{}ms]", metadata.name(), partName, partBytes, shardContainer.path(), - threadPool.relativeTimeInMillis() - startMS + TimeUnit.NANOSECONDS.toMillis(uploadTimeInNanos) ); } Store.verify(indexInput); + blobStoreSnapshotMetrics.incrementNumberOfBlobsUploaded(); snapshotStatus.addProcessedFile(fileInfo.length()); } catch (Exception t) { failStoreIfCorrupted(store, t); @@ -4209,4 +4222,9 @@ public final Set getUsageFeatures() { protected Set getExtraUsageFeatures() { return Set.of(); } + + @Override + public LongWithAttributes getShardSnapshotsInProgress() { + return blobStoreSnapshotMetrics.getShardSnapshotsInProgress(); + } } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java new file mode 100644 index 0000000000000..560785a561d43 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.repositories.blobstore; + +import org.elasticsearch.cluster.metadata.RepositoryMetadata; +import org.elasticsearch.common.metrics.CounterMetric; +import org.elasticsearch.repositories.SnapshotMetrics; +import org.elasticsearch.telemetry.metric.LongWithAttributes; + +import java.util.Map; + +public class BlobStoreSnapshotMetrics { + + private final SnapshotMetrics snapshotMetrics; + private final CounterMetric shardSnapshotsInProgress = new CounterMetric(); + private final CounterMetric snapshotRateLimitingTimeInNanos = new CounterMetric(); + private final CounterMetric restoreRateLimitingTimeInNanos = new CounterMetric(); + private final CounterMetric numberOfBlobsUploaded = new CounterMetric(); + private final CounterMetric numberOfBytesUploaded = new CounterMetric(); + private final CounterMetric uploadTimeInNanos = new CounterMetric(); + private final CounterMetric uploadReadTimeInNanos = new CounterMetric(); + private final CounterMetric numberOfShardSnapshotsStarted = new CounterMetric(); + private final CounterMetric numberOfShardSnapshotsCompleted = new CounterMetric(); + private final Map metricAttributes; + + public BlobStoreSnapshotMetrics(RepositoryMetadata repositoryMetadata, SnapshotMetrics snapshotMetrics) { + this.snapshotMetrics = snapshotMetrics; + metricAttributes = SnapshotMetrics.createAttributesMap(repositoryMetadata); + } + + public void incrementSnapshotRateLimitingTimeInNanos(long throttleTimeNanos) { + snapshotMetrics.snapshotCreateThrottleDurationCounter().incrementBy(throttleTimeNanos); + snapshotRateLimitingTimeInNanos.inc(throttleTimeNanos); + } + + public long snapshotRateLimitingTimeInNanos() { + return snapshotRateLimitingTimeInNanos.count(); + } + + public void incrementRestoreRateLimitingTimeInNanos(long throttleTimeNanos) { + snapshotMetrics.snapshotRestoreThrottleDurationCounter().incrementBy(throttleTimeNanos); + restoreRateLimitingTimeInNanos.inc(throttleTimeNanos); + } + + public long restoreRateLimitingTimeInNanos() { + return restoreRateLimitingTimeInNanos.count(); + } + + public void incrementCountersForPartUpload(long partSizeInBytes, long partWriteTimeNanos) { + snapshotMetrics.snapshotBytesUploadedCounter().incrementBy(partSizeInBytes); + snapshotMetrics.snapshotUploadDurationCounter().incrementBy(partWriteTimeNanos); + numberOfBytesUploaded.inc(partSizeInBytes); + uploadTimeInNanos.inc(partWriteTimeNanos); + } + + public void incrementNumberOfBlobsUploaded() { + snapshotMetrics.snapshotBlobsUploadedCounter().increment(); + numberOfBlobsUploaded.inc(); + } + + public void shardSnapshotStarted() { + snapshotMetrics.snapshotsStartedCounter().increment(); + numberOfShardSnapshotsStarted.inc(); + shardSnapshotsInProgress.inc(); + } + + public void shardSnapshotCompleted() { + snapshotMetrics.snapshotsCompletedCounter().increment(); + numberOfShardSnapshotsCompleted.inc(); + shardSnapshotsInProgress.dec(); + } + + public void incrementUploadReadTime(long readTimeInNanos) { + snapshotMetrics.snapshotUploadReadDurationCounter().incrementBy(readTimeInNanos); + uploadReadTimeInNanos.inc(readTimeInNanos); + } + + public LongWithAttributes getShardSnapshotsInProgress() { + return new LongWithAttributes(shardSnapshotsInProgress.count(), metricAttributes); + } +} diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/MeteredBlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/MeteredBlobStoreRepository.java index aa7fae749c328..478a936b6c8b9 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/MeteredBlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/MeteredBlobStoreRepository.java @@ -18,6 +18,7 @@ import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoryInfo; import org.elasticsearch.repositories.RepositoryStatsSnapshot; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -34,9 +35,10 @@ public MeteredBlobStoreRepository( BigArrays bigArrays, RecoverySettings recoverySettings, BlobPath basePath, - Map location + Map location, + SnapshotMetrics snapshotMetrics ) { - super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, basePath); + super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, basePath, snapshotMetrics); ThreadPool threadPool = clusterService.getClusterApplierService().threadPool(); this.repositoryInfo = new RepositoryInfo( UUIDs.randomBase64UUID(), diff --git a/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java b/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java index 97e2c0b2de44b..38b6a3a93d728 100644 --- a/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java @@ -24,6 +24,7 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoryException; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -84,7 +85,16 @@ public FsRepository( BigArrays bigArrays, RecoverySettings recoverySettings ) { - super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, BlobPath.EMPTY); + super( + projectId, + metadata, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings, + BlobPath.EMPTY, + SnapshotMetrics.NOOP + ); this.environment = environment; String location = REPOSITORIES_LOCATION_SETTING.get(metadata.settings()); if (location.isEmpty()) { diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java index cee2c5fd8d41d..b6e141f644c76 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryException; import org.elasticsearch.repositories.RepositoryMissingException; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.reservedstate.TransformState; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -127,7 +128,7 @@ public void testRemoveRepo() { private RepositoriesService mockRepositoriesService() { var fsFactory = new Repository.Factory() { @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata) { + public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { var repo = mock(Repository.class); doAnswer(invocation -> metadata).when(repo).getMetadata(); return repo; diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java index 96d601f9091ff..834365aa1c0b3 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java @@ -138,15 +138,15 @@ public void setUp() throws Exception { Map typesRegistry = Map.of( TestRepository.TYPE, - (projectId, metadata1) -> new TestRepository(projectId, metadata1), + (projectId, metadata1, snapshotMetrics) -> new TestRepository(projectId, metadata1), UnstableRepository.TYPE, - (projectId, metadata2) -> new UnstableRepository(projectId, metadata2), + (projectId, metadata2, snapshotMetrics) -> new UnstableRepository(projectId, metadata2), VerificationFailRepository.TYPE, - (projectId, metadata3) -> new VerificationFailRepository(projectId, metadata3), + (projectId, metadata3, snapshotMetrics) -> new VerificationFailRepository(projectId, metadata3), MeteredRepositoryTypeA.TYPE, - (projectId, metadata) -> new MeteredRepositoryTypeA(projectId, metadata, clusterService), + (projectId, metadata, snapshotMetrics) -> new MeteredRepositoryTypeA(projectId, metadata, clusterService), MeteredRepositoryTypeB.TYPE, - (projectId, metadata) -> new MeteredRepositoryTypeB(projectId, metadata, clusterService) + (projectId, metadata, snapshotMetrics) -> new MeteredRepositoryTypeB(projectId, metadata, clusterService) ); repositoriesService = new RepositoriesService( Settings.EMPTY, @@ -832,7 +832,8 @@ private MeteredRepositoryTypeA(ProjectId projectId, RepositoryMetadata metadata, MockBigArrays.NON_RECYCLING_INSTANCE, mock(RecoverySettings.class), BlobPath.EMPTY, - Map.of("bucket", "bucket-a") + Map.of("bucket", "bucket-a"), + SnapshotMetrics.NOOP ); } @@ -860,7 +861,8 @@ private MeteredRepositoryTypeB(ProjectId projectId, RepositoryMetadata metadata, MockBigArrays.NON_RECYCLING_INSTANCE, mock(RecoverySettings.class), BlobPath.EMPTY, - Map.of("bucket", "bucket-b") + Map.of("bucket", "bucket-b"), + SnapshotMetrics.NOOP ); } diff --git a/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java b/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java index de1979cfcf5d2..b4ece269399ec 100644 --- a/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java @@ -75,7 +75,7 @@ public Map getRepositories( ) { return Collections.singletonMap( TEST_REPO_TYPE, - (projectId, metadata) -> new FsRepository( + (projectId, metadata, snapshotMetrics) -> new FsRepository( projectId, metadata, env, diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index c005691b212b2..169f44a2b2351 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -2397,7 +2397,7 @@ public RecyclerBytesStreamOutput newNetworkBytesStream() { clusterService, Collections.singletonMap( FsRepository.TYPE, - (projectId, metadata) -> new FsRepository( + (projectId, metadata, snapshotMetrics) -> new FsRepository( projectId, metadata, environment, diff --git a/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java b/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java index c553f0f10062a..f27fca9382b1e 100644 --- a/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java +++ b/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java @@ -65,7 +65,7 @@ public Map getRepositories( ) { return Map.of( REPO_TYPE, - (projectId, metadata) -> new LatencySimulatingBlobStoreRepository( + (projectId, metadata, snapshotMetrics) -> new LatencySimulatingBlobStoreRepository( projectId, metadata, env, diff --git a/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java b/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java index b60c965a66537..ac04fb4dd4f07 100644 --- a/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java +++ b/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java @@ -41,7 +41,7 @@ public Map getRepositories( ) { return Map.of( TYPE, - (projectId, metadata) -> new LatencySimulatingBlobStoreRepository( + (projectId, metadata, snapshotMetrics) -> new LatencySimulatingBlobStoreRepository( projectId, metadata, env, diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java index 252091bb45d82..26062bcb0c201 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java @@ -90,7 +90,7 @@ public Map getRepositories( ) { return Collections.singletonMap( "mock", - (projectId, metadata) -> new MockRepository( + (projectId, metadata, snapshotMetrics) -> new MockRepository( projectId, metadata, env, diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/Ccr.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/Ccr.java index bfc85f3efb0fe..53a5c3cc2059d 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/Ccr.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/Ccr.java @@ -375,7 +375,7 @@ public Map getInternalRepositories( ClusterService clusterService, RecoverySettings recoverySettings ) { - Repository.Factory repositoryFactory = (projectId, metadata) -> new CcrRepository( + Repository.Factory repositoryFactory = (projectId, metadata, snapshotMetrics) -> new CcrRepository( projectId, metadata, client, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java index e6f3c3fa54277..21e1a0c010b4f 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java @@ -43,6 +43,7 @@ import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.SnapshotIndexCommit; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.SnapshotShardContext; import java.io.Closeable; @@ -257,20 +258,29 @@ public static Repository.Factory newRepositoryFactory() { return new Repository.Factory() { @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata) { + public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { throw new UnsupportedOperationException(); } @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata, Function typeLookup) - throws Exception { + public Repository create( + ProjectId projectId, + RepositoryMetadata metadata, + Function typeLookup, + SnapshotMetrics snapshotMetrics + ) throws Exception { String delegateType = DELEGATE_TYPE.get(metadata.settings()); if (Strings.hasLength(delegateType) == false) { throw new IllegalArgumentException(DELEGATE_TYPE.getKey() + " must be set"); } Repository.Factory factory = typeLookup.apply(delegateType); return new SourceOnlySnapshotRepository( - factory.create(projectId, new RepositoryMetadata(metadata.name(), delegateType, metadata.settings()), typeLookup) + factory.create( + projectId, + new RepositoryMetadata(metadata.name(), delegateType, metadata.settings()), + typeLookup, + snapshotMetrics + ) ); } }; diff --git a/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java b/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java index 614036d3792ca..8c7ae5d914d29 100644 --- a/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java +++ b/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java @@ -67,7 +67,7 @@ public Map getRepositories( ) { return Map.of( FAKE_VERSIONS_TYPE, - (projectId, metadata) -> new FakeVersionsRepo( + (projectId, metadata, snapshotMetrics) -> new FakeVersionsRepo( projectId, metadata, env, diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java index 518ff2354f498..6fbb4f98a8236 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java @@ -381,7 +381,7 @@ public Map getRepositories( ) { return Collections.singletonMap( TYPE, - (projectId, metadata) -> new CustomMockRepository( + (projectId, metadata, snapshotMetrics) -> new CustomMockRepository( projectId, metadata, env, diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java index dcf77d06e323a..c6254e3d9da7f 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java @@ -454,7 +454,7 @@ public Map getRepositories( ) { return Collections.singletonMap( "tracking", - (projectId, metadata) -> new FsRepository( + (projectId, metadata, snapshotMetrics) -> new FsRepository( projectId, metadata, env, diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java index e0674c0151f65..9d95548ee38c8 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java @@ -248,7 +248,7 @@ public Map getRepositories( ) { return Collections.singletonMap( "test-fs", - (projectId, metadata) -> new FsRepository( + (projectId, metadata, snapshotMetrics) -> new FsRepository( projectId, metadata, env, diff --git a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java index 219dd20f4e620..19afeb0a5aa12 100644 --- a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java +++ b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java @@ -120,7 +120,7 @@ public Map getRepositories( ) { return Map.of( TestDelayedRepo.TYPE, - (projectId, metadata) -> new TestDelayedRepo( + (projectId, metadata, snapshotMetrics) -> new TestDelayedRepo( projectId, metadata, env, diff --git a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java index d3eff9eb8585e..c6e15b229813d 100644 --- a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java +++ b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java @@ -138,7 +138,7 @@ public Map getRepositories( ) { return Map.of( TestDelayedRepo.TYPE, - (projectId, metadata) -> new TestDelayedRepo( + (projectId, metadata, snapshotMetrics) -> new TestDelayedRepo( projectId, metadata, env, @@ -210,7 +210,7 @@ public Map getRepositories( ) { return Map.of( TestRestartBeforeListenersRepo.TYPE, - (projectId, metadata) -> new TestRestartBeforeListenersRepo( + (projectId, metadata, snapshotMetrics) -> new TestRestartBeforeListenersRepo( projectId, metadata, env, diff --git a/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java b/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java index 6f73efb2fa095..b3c1f237654a6 100644 --- a/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java +++ b/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java @@ -152,7 +152,7 @@ public Map getRepositories( ) { return Map.of( FAULTY_TYPE, - (projectId, metadata) -> new FaultyRepository( + (projectId, metadata, snapshotMetrics) -> new FaultyRepository( projectId, metadata, env, @@ -162,7 +162,7 @@ public Map getRepositories( recoverySettings ), INSTRUMENTED_TYPE, - (projectId, metadata) -> new InstrumentedRepo( + (projectId, metadata, snapshotMetrics) -> new InstrumentedRepo( projectId, metadata, env, @@ -172,7 +172,7 @@ public Map getRepositories( recoverySettings ), FILTER_TYPE, - (projectId, metadata) -> new FilterFsRepository( + (projectId, metadata, snapshotMetrics) -> new FilterFsRepository( projectId, metadata, env, diff --git a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java index 399a9eee0d752..2d7831d13b655 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java @@ -40,6 +40,7 @@ import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryMissingException; import org.elasticsearch.repositories.RepositoryVerificationException; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.blobstore.testkit.SnapshotRepositoryTestKit; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; @@ -546,7 +547,7 @@ public Map getRepositories( ) { return Map.of( DISRUPTABLE_REPO_TYPE, - (projectId, metadata) -> new DisruptableRepository( + (projectId, metadata, snapshotMetrics) -> new DisruptableRepository( projectId, metadata, namedXContentRegistry, @@ -572,7 +573,7 @@ static class DisruptableRepository extends BlobStoreRepository { RecoverySettings recoverySettings, BlobPath basePath ) { - super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, basePath); + super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, basePath, SnapshotMetrics.NOOP); } void setBlobStore(BlobStore blobStore) { diff --git a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java index a65716a63f6d2..8daa4a2698255 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java @@ -36,6 +36,7 @@ import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryMissingException; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.blobstore.testkit.SnapshotRepositoryTestKit; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; @@ -188,7 +189,7 @@ public Map getRepositories( ) { return Map.of( ASSERTING_REPO_TYPE, - (projectId, metadata) -> new AssertingRepository( + (projectId, metadata, snapshotMetrics) -> new AssertingRepository( projectId, metadata, namedXContentRegistry, @@ -227,7 +228,7 @@ static class AssertingRepository extends BlobStoreRepository { RecoverySettings recoverySettings, BlobPath basePath ) { - super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, basePath); + super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, basePath, SnapshotMetrics.NOOP); } void setBlobStore(BlobStore blobStore) { diff --git a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java index 92297f7585128..e627ca83db421 100644 --- a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java +++ b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java @@ -265,7 +265,7 @@ public Map getRepositories( ) { return Collections.singletonMap( "verifyaccess-fs", - (projectId, metadata) -> new AccessVerifyingRepo( + (projectId, metadata, snapshotMetrics) -> new AccessVerifyingRepo( projectId, metadata, env, From b8be99eca6623a30c21ee3915e6c411d2b3966be Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 30 Jun 2025 11:49:52 +1000 Subject: [PATCH 02/65] Fix double counted snapshot completion --- .../repositories/blobstore/BlobStoreRepository.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index c2c463a73fda5..0a8ca6f25d9ff 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -3488,7 +3488,6 @@ private void doSnapshotShard(SnapshotShardContext context) { ); snapshotStatus.updateStatusDescription("all files uploaded: done"); snapshotStatus.moveToDone(threadPool.absoluteTimeInMillis(), shardSnapshotResult); - blobStoreSnapshotMetrics.shardSnapshotCompleted(); context.onResponse(shardSnapshotResult); }, e -> { try { From 9eeee307a805ed57584f475cbcd6b5fb5cab924b Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 30 Jun 2025 14:08:11 +1000 Subject: [PATCH 03/65] Reduce size of change --- .../azure/AzureRepositoryPlugin.java | 35 ++++++---- ...eCloudStorageBlobStoreRepositoryTests.java | 69 ++++++++++--------- .../gcs/GoogleCloudStoragePlugin.java | 34 +++++---- .../repositories/s3/S3RepositoryPlugin.java | 29 ++++---- .../repository/url/URLRepositoryPlugin.java | 2 +- .../repositories/hdfs/HdfsPlugin.java | 2 +- .../plan/ShardSnapshotsServiceIT.java | 2 +- .../repositories/InvalidRepositoryIT.java | 2 +- ...BlobStoreRepositoryOperationPurposeIT.java | 2 +- ...etadataLoadingDuringSnapshotRestoreIT.java | 2 +- .../SnapshotsServiceDoubleFinalizationIT.java | 2 +- .../repositories/RepositoriesModule.java | 2 +- .../repositories/Repository.java | 26 ++++++- .../ReservedRepositoryActionTests.java | 3 +- .../RepositoriesServiceTests.java | 10 +-- ...bStoreRepositoryDeleteThrottlingTests.java | 2 +- .../snapshots/SnapshotResiliencyTests.java | 2 +- ...ncySimulatingBlobStoreRepositoryTests.java | 2 +- .../LatencySimulatingRepositoryPlugin.java | 2 +- .../snapshots/mockstore/MockRepository.java | 2 +- .../java/org/elasticsearch/xpack/ccr/Ccr.java | 2 +- .../SourceOnlySnapshotRepository.java | 2 +- .../lucene/bwc/AbstractArchiveTestCase.java | 2 +- ...chableSnapshotDiskThresholdIntegTests.java | 2 +- ...archableSnapshotsPrewarmingIntegTests.java | 2 +- ...SnapshotRecoveryStateIntegrationTests.java | 2 +- .../xpack/slm/SLMHealthBlockedSnapshotIT.java | 2 +- .../xpack/slm/SLMStatDisruptionIT.java | 4 +- .../SnapshotBasedIndexRecoveryIT.java | 6 +- .../analyze/RepositoryAnalysisFailureIT.java | 2 +- .../analyze/RepositoryAnalysisSuccessIT.java | 2 +- .../votingonly/VotingOnlyNodePluginTests.java | 2 +- 32 files changed, 153 insertions(+), 109 deletions(-) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java index 17c6ebcf18095..ed86303865fab 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java @@ -10,6 +10,8 @@ package org.elasticsearch.repositories.azure; import org.apache.lucene.util.SetOnce; +import org.elasticsearch.cluster.metadata.ProjectId; +import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Setting; @@ -24,6 +26,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.threadpool.ExecutorBuilder; import org.elasticsearch.threadpool.ScalingExecutorBuilder; import org.elasticsearch.threadpool.ThreadPool; @@ -62,20 +65,24 @@ public Map getRepositories( RecoverySettings recoverySettings, RepositoriesMetrics repositoriesMetrics ) { - return Collections.singletonMap(AzureRepository.TYPE, (projectId, metadata, snapshotMetrics) -> { - AzureStorageService storageService = azureStoreService.get(); - assert storageService != null; - return new AzureRepository( - projectId, - metadata, - namedXContentRegistry, - storageService, - clusterService, - bigArrays, - recoverySettings, - repositoriesMetrics, - snapshotMetrics - ); + return Collections.singletonMap(AzureRepository.TYPE, new Repository.SnapshotMetricsFactory() { + + @Override + public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { + AzureStorageService storageService = azureStoreService.get(); + assert storageService != null; + return new AzureRepository( + projectId, + metadata, + namedXContentRegistry, + storageService, + clusterService, + bigArrays, + recoverySettings, + repositoriesMetrics, + snapshotMetrics + ); + } }); } diff --git a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java index 2906bc63ffe05..efdf0b84e5525 100644 --- a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java +++ b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java @@ -22,6 +22,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.BackoffPolicy; @@ -45,6 +46,7 @@ import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.blobstore.ESMockAPIBasedRepositoryIntegTestCase; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -274,39 +276,42 @@ public Map getRepositories( RecoverySettings recoverySettings, RepositoriesMetrics repositoriesMetrics ) { - return Collections.singletonMap( - GoogleCloudStorageRepository.TYPE, - (projectId, metadata, snapshotMetrics) -> new GoogleCloudStorageRepository( - projectId, - metadata, - registry, - this.storageService, - clusterService, - bigArrays, - recoverySettings, - new GcsRepositoryStatsCollector(), - snapshotMetrics - ) { - @Override - protected GoogleCloudStorageBlobStore createBlobStore() { - return new GoogleCloudStorageBlobStore( - metadata.settings().get("bucket"), - "test", - metadata.name(), - storageService, - bigArrays, - randomIntBetween(1, 8) * 1024, - BackoffPolicy.noBackoff(), - this.statsCollector() - ) { - @Override - long getLargeBlobThresholdInBytes() { - return ByteSizeUnit.MB.toBytes(1); - } - }; - } + return Collections.singletonMap(GoogleCloudStorageRepository.TYPE, new Repository.SnapshotMetricsFactory() { + + @Override + public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { + return new GoogleCloudStorageRepository( + projectId, + metadata, + registry, + TestGoogleCloudStoragePlugin.this.storageService, + clusterService, + bigArrays, + recoverySettings, + new GcsRepositoryStatsCollector(), + snapshotMetrics + ) { + @Override + protected GoogleCloudStorageBlobStore createBlobStore() { + return new GoogleCloudStorageBlobStore( + metadata.settings().get("bucket"), + "test", + metadata.name(), + storageService, + bigArrays, + randomIntBetween(1, 8) * 1024, + BackoffPolicy.noBackoff(), + this.statsCollector() + ) { + @Override + long getLargeBlobThresholdInBytes() { + return ByteSizeUnit.MB.toBytes(1); + } + }; + } + }; } - ); + }); } } diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java index 8f606535db716..15ab1fa0d1f53 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java @@ -9,6 +9,8 @@ package org.elasticsearch.repositories.gcs; +import org.elasticsearch.cluster.metadata.ProjectId; +import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Setting; @@ -21,6 +23,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.xcontent.NamedXContentRegistry; import java.util.Arrays; @@ -55,20 +58,23 @@ public Map getRepositories( RecoverySettings recoverySettings, RepositoriesMetrics repositoriesMetrics ) { - return Collections.singletonMap( - GoogleCloudStorageRepository.TYPE, - (projectId, metadata, snapshotMetrics) -> new GoogleCloudStorageRepository( - projectId, - metadata, - namedXContentRegistry, - this.storageService, - clusterService, - bigArrays, - recoverySettings, - new GcsRepositoryStatsCollector(clusterService.threadPool(), metadata, repositoriesMetrics), - snapshotMetrics - ) - ); + return Collections.singletonMap(GoogleCloudStorageRepository.TYPE, new Repository.SnapshotMetricsFactory() { + + @Override + public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { + return new GoogleCloudStorageRepository( + projectId, + metadata, + namedXContentRegistry, + GoogleCloudStoragePlugin.this.storageService, + clusterService, + bigArrays, + recoverySettings, + new GcsRepositoryStatsCollector(clusterService.threadPool(), metadata, repositoriesMetrics), + snapshotMetrics + ); + } + }); } @Override diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java index 9b63f193d762e..131175128af44 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java @@ -119,19 +119,22 @@ public Map getRepositories( final RepositoriesMetrics repositoriesMetrics ) { final S3RepositoriesMetrics s3RepositoriesMetrics = new S3RepositoriesMetrics(repositoriesMetrics); - return Collections.singletonMap( - S3Repository.TYPE, - (projectId, metadata, snapshotMetrics) -> createRepository( - projectId, - metadata, - registry, - clusterService, - bigArrays, - recoverySettings, - s3RepositoriesMetrics, - snapshotMetrics - ) - ); + return Collections.singletonMap(S3Repository.TYPE, new Repository.SnapshotMetricsFactory() { + + @Override + public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { + return createRepository( + projectId, + metadata, + registry, + clusterService, + bigArrays, + recoverySettings, + s3RepositoriesMetrics, + snapshotMetrics + ); + } + }); } @Override diff --git a/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java b/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java index 4a2dc21d6e9f3..821b761aa663d 100644 --- a/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java +++ b/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java @@ -53,7 +53,7 @@ public Map getRepositories( ) { return Collections.singletonMap( URLRepository.TYPE, - (projectId, metadata, snapshotMetrics) -> new URLRepository( + (projectId, metadata) -> new URLRepository( projectId, metadata, env, diff --git a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java index 34c7dbd611418..9d80a270c7242 100644 --- a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java +++ b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java @@ -71,7 +71,7 @@ public Map getRepositories( ) { return Collections.singletonMap( "hdfs", - (projectId, metadata, snapshotMetrics) -> new HdfsRepository( + (projectId, metadata) -> new HdfsRepository( projectId, metadata, env, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java index a3e8422068f2f..06e4d06fcee0e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java @@ -80,7 +80,7 @@ public Map getRepositories( ) { return Collections.singletonMap( TYPE, - (projectId, metadata, snapshotMetrics) -> new FailingRepo( + (projectId, metadata) -> new FailingRepo( projectId, metadata, env, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java index bfcab499f1043..62967fc2d035f 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java @@ -78,7 +78,7 @@ public Map getRepositories( ) { return Collections.singletonMap( TYPE, - (projectId, metadata, snapshotMetrics) -> new UnstableRepository( + (projectId, metadata) -> new UnstableRepository( projectId, metadata, env, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java index 1980c3c78eeb1..fb119caa25cb6 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java @@ -101,7 +101,7 @@ public Map getRepositories( ) { return Map.of( ASSERTING_REPO_TYPE, - (projectId, metadata, snapshotMetrics) -> new AssertingRepository( + (projectId, metadata) -> new AssertingRepository( projectId, metadata, env, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java index 56a40383006a4..89a8af49efdef 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java @@ -218,7 +218,7 @@ public Map getRepositories( ) { return Collections.singletonMap( TYPE, - (projectId, metadata, snapshotMetrics) -> new CountingMockRepository( + (projectId, metadata) -> new CountingMockRepository( projectId, metadata, env, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java index a0c14b2df0dc2..7c00a93fa495b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java @@ -213,7 +213,7 @@ public Map getRepositories( ) { return Map.of( REPO_TYPE, - (projectId, metadata, snapshotMetrics) -> new TestRepository( + (projectId, metadata) -> new TestRepository( projectId, metadata, env, diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java index c51e796d708d8..d8c1235d6ac59 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java @@ -54,7 +54,7 @@ public RepositoriesModule( Map factories = new HashMap<>(); factories.put( FsRepository.TYPE, - (projectId, metadata, snapshotMetrics) -> new FsRepository( + (projectId, metadata) -> new FsRepository( projectId, metadata, env, diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index 2ae246934d619..13403e2ce3eb8 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -63,13 +63,26 @@ interface Factory { * @param projectId the project-id for the repository or {@code null} if the repository is at the cluster level. * @param metadata metadata for the repository including name and settings */ - Repository create(@Nullable ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) throws Exception; + Repository create(@Nullable ProjectId projectId, RepositoryMetadata metadata) throws Exception; + + /** + * Constructs a repository. + * + * @param projectId the project-id for the repository or {@code null} if the repository is at the cluster level. + * @param metadata metadata for the repository including name and settings + * @param snapshotMetrics the singleton SnapshotMetrics instance + */ + default Repository create(@Nullable ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) + throws Exception { + return create(projectId, metadata); + } /** * Constructs a repository. * @param projectId the project-id for the repository or {@code null} if the repository is at the cluster level. * @param metadata metadata for the repository including name and settings * @param typeLookup a function that returns the repository factory for the given repository type. + * @param snapshotMetrics the singleton SnapshotMetrics instance */ default Repository create( @Nullable ProjectId projectId, @@ -81,6 +94,17 @@ default Repository create( } } + /** + * A convenience class for {@link Factory} instances that require a {@link SnapshotMetrics} instance + */ + class SnapshotMetricsFactory implements Factory { + + @Override + public Repository create(ProjectId projectId, RepositoryMetadata metadata) throws Exception { + throw new UnsupportedOperationException("This repository requires a SnapshotMetrics implementation"); + } + } + /** * Get the project-id for the repository. * diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java index b6e141f644c76..cee2c5fd8d41d 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java @@ -21,7 +21,6 @@ import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryException; import org.elasticsearch.repositories.RepositoryMissingException; -import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.reservedstate.TransformState; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -128,7 +127,7 @@ public void testRemoveRepo() { private RepositoriesService mockRepositoriesService() { var fsFactory = new Repository.Factory() { @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { + public Repository create(ProjectId projectId, RepositoryMetadata metadata) { var repo = mock(Repository.class); doAnswer(invocation -> metadata).when(repo).getMetadata(); return repo; diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java index 834365aa1c0b3..c127ca55ccddd 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java @@ -138,15 +138,15 @@ public void setUp() throws Exception { Map typesRegistry = Map.of( TestRepository.TYPE, - (projectId, metadata1, snapshotMetrics) -> new TestRepository(projectId, metadata1), + (projectId, metadata1) -> new TestRepository(projectId, metadata1), UnstableRepository.TYPE, - (projectId, metadata2, snapshotMetrics) -> new UnstableRepository(projectId, metadata2), + (projectId, metadata2) -> new UnstableRepository(projectId, metadata2), VerificationFailRepository.TYPE, - (projectId, metadata3, snapshotMetrics) -> new VerificationFailRepository(projectId, metadata3), + (projectId, metadata3) -> new VerificationFailRepository(projectId, metadata3), MeteredRepositoryTypeA.TYPE, - (projectId, metadata, snapshotMetrics) -> new MeteredRepositoryTypeA(projectId, metadata, clusterService), + (projectId, metadata) -> new MeteredRepositoryTypeA(projectId, metadata, clusterService), MeteredRepositoryTypeB.TYPE, - (projectId, metadata, snapshotMetrics) -> new MeteredRepositoryTypeB(projectId, metadata, clusterService) + (projectId, metadata) -> new MeteredRepositoryTypeB(projectId, metadata, clusterService) ); repositoriesService = new RepositoriesService( Settings.EMPTY, diff --git a/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java b/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java index b4ece269399ec..de1979cfcf5d2 100644 --- a/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java @@ -75,7 +75,7 @@ public Map getRepositories( ) { return Collections.singletonMap( TEST_REPO_TYPE, - (projectId, metadata, snapshotMetrics) -> new FsRepository( + (projectId, metadata) -> new FsRepository( projectId, metadata, env, diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index 169f44a2b2351..c005691b212b2 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -2397,7 +2397,7 @@ public RecyclerBytesStreamOutput newNetworkBytesStream() { clusterService, Collections.singletonMap( FsRepository.TYPE, - (projectId, metadata, snapshotMetrics) -> new FsRepository( + (projectId, metadata) -> new FsRepository( projectId, metadata, environment, diff --git a/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java b/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java index f27fca9382b1e..c553f0f10062a 100644 --- a/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java +++ b/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java @@ -65,7 +65,7 @@ public Map getRepositories( ) { return Map.of( REPO_TYPE, - (projectId, metadata, snapshotMetrics) -> new LatencySimulatingBlobStoreRepository( + (projectId, metadata) -> new LatencySimulatingBlobStoreRepository( projectId, metadata, env, diff --git a/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java b/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java index ac04fb4dd4f07..b60c965a66537 100644 --- a/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java +++ b/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java @@ -41,7 +41,7 @@ public Map getRepositories( ) { return Map.of( TYPE, - (projectId, metadata, snapshotMetrics) -> new LatencySimulatingBlobStoreRepository( + (projectId, metadata) -> new LatencySimulatingBlobStoreRepository( projectId, metadata, env, diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java index 26062bcb0c201..252091bb45d82 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java @@ -90,7 +90,7 @@ public Map getRepositories( ) { return Collections.singletonMap( "mock", - (projectId, metadata, snapshotMetrics) -> new MockRepository( + (projectId, metadata) -> new MockRepository( projectId, metadata, env, diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/Ccr.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/Ccr.java index 53a5c3cc2059d..bfc85f3efb0fe 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/Ccr.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/Ccr.java @@ -375,7 +375,7 @@ public Map getInternalRepositories( ClusterService clusterService, RecoverySettings recoverySettings ) { - Repository.Factory repositoryFactory = (projectId, metadata, snapshotMetrics) -> new CcrRepository( + Repository.Factory repositoryFactory = (projectId, metadata) -> new CcrRepository( projectId, metadata, client, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java index 21e1a0c010b4f..1ae1e5e0fe89b 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java @@ -258,7 +258,7 @@ public static Repository.Factory newRepositoryFactory() { return new Repository.Factory() { @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { + public Repository create(ProjectId projectId, RepositoryMetadata metadata) { throw new UnsupportedOperationException(); } diff --git a/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java b/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java index 8c7ae5d914d29..614036d3792ca 100644 --- a/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java +++ b/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java @@ -67,7 +67,7 @@ public Map getRepositories( ) { return Map.of( FAKE_VERSIONS_TYPE, - (projectId, metadata, snapshotMetrics) -> new FakeVersionsRepo( + (projectId, metadata) -> new FakeVersionsRepo( projectId, metadata, env, diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java index 6fbb4f98a8236..518ff2354f498 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java @@ -381,7 +381,7 @@ public Map getRepositories( ) { return Collections.singletonMap( TYPE, - (projectId, metadata, snapshotMetrics) -> new CustomMockRepository( + (projectId, metadata) -> new CustomMockRepository( projectId, metadata, env, diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java index c6254e3d9da7f..dcf77d06e323a 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java @@ -454,7 +454,7 @@ public Map getRepositories( ) { return Collections.singletonMap( "tracking", - (projectId, metadata, snapshotMetrics) -> new FsRepository( + (projectId, metadata) -> new FsRepository( projectId, metadata, env, diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java index 9d95548ee38c8..e0674c0151f65 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java @@ -248,7 +248,7 @@ public Map getRepositories( ) { return Collections.singletonMap( "test-fs", - (projectId, metadata, snapshotMetrics) -> new FsRepository( + (projectId, metadata) -> new FsRepository( projectId, metadata, env, diff --git a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java index 19afeb0a5aa12..219dd20f4e620 100644 --- a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java +++ b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java @@ -120,7 +120,7 @@ public Map getRepositories( ) { return Map.of( TestDelayedRepo.TYPE, - (projectId, metadata, snapshotMetrics) -> new TestDelayedRepo( + (projectId, metadata) -> new TestDelayedRepo( projectId, metadata, env, diff --git a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java index c6e15b229813d..d3eff9eb8585e 100644 --- a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java +++ b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java @@ -138,7 +138,7 @@ public Map getRepositories( ) { return Map.of( TestDelayedRepo.TYPE, - (projectId, metadata, snapshotMetrics) -> new TestDelayedRepo( + (projectId, metadata) -> new TestDelayedRepo( projectId, metadata, env, @@ -210,7 +210,7 @@ public Map getRepositories( ) { return Map.of( TestRestartBeforeListenersRepo.TYPE, - (projectId, metadata, snapshotMetrics) -> new TestRestartBeforeListenersRepo( + (projectId, metadata) -> new TestRestartBeforeListenersRepo( projectId, metadata, env, diff --git a/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java b/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java index b3c1f237654a6..6f73efb2fa095 100644 --- a/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java +++ b/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java @@ -152,7 +152,7 @@ public Map getRepositories( ) { return Map.of( FAULTY_TYPE, - (projectId, metadata, snapshotMetrics) -> new FaultyRepository( + (projectId, metadata) -> new FaultyRepository( projectId, metadata, env, @@ -162,7 +162,7 @@ public Map getRepositories( recoverySettings ), INSTRUMENTED_TYPE, - (projectId, metadata, snapshotMetrics) -> new InstrumentedRepo( + (projectId, metadata) -> new InstrumentedRepo( projectId, metadata, env, @@ -172,7 +172,7 @@ public Map getRepositories( recoverySettings ), FILTER_TYPE, - (projectId, metadata, snapshotMetrics) -> new FilterFsRepository( + (projectId, metadata) -> new FilterFsRepository( projectId, metadata, env, diff --git a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java index 2d7831d13b655..a347cae9f0954 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java @@ -547,7 +547,7 @@ public Map getRepositories( ) { return Map.of( DISRUPTABLE_REPO_TYPE, - (projectId, metadata, snapshotMetrics) -> new DisruptableRepository( + (projectId, metadata) -> new DisruptableRepository( projectId, metadata, namedXContentRegistry, diff --git a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java index 8daa4a2698255..6837684e71611 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java @@ -189,7 +189,7 @@ public Map getRepositories( ) { return Map.of( ASSERTING_REPO_TYPE, - (projectId, metadata, snapshotMetrics) -> new AssertingRepository( + (projectId, metadata) -> new AssertingRepository( projectId, metadata, namedXContentRegistry, diff --git a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java index e627ca83db421..92297f7585128 100644 --- a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java +++ b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java @@ -265,7 +265,7 @@ public Map getRepositories( ) { return Collections.singletonMap( "verifyaccess-fs", - (projectId, metadata, snapshotMetrics) -> new AccessVerifyingRepo( + (projectId, metadata) -> new AccessVerifyingRepo( projectId, metadata, env, From 67eb7530a4724c15b12e3f8276eb6b400294ad47 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 30 Jun 2025 14:18:09 +1000 Subject: [PATCH 04/65] Add MeterRegistry param in callers --- .../repositories/RepositoriesService.java | 12 ------------ .../reservedstate/ReservedRepositoryActionTests.java | 4 +++- .../status/TransportSnapshotsStatusActionTests.java | 4 +++- ...IndicesClusterStateServiceRandomUpdatesTests.java | 4 +++- .../repositories/RepositoriesServiceTests.java | 4 +++- .../snapshots/SnapshotResiliencyTests.java | 4 +++- 6 files changed, 15 insertions(+), 17 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java index 22d4a6b6d8e4d..d290292a30b36 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java @@ -127,18 +127,6 @@ public class RepositoriesService extends AbstractLifecycleComponent implements C private final List> preRestoreChecks; private final SnapshotMetrics snapshotMetrics; - public RepositoriesService( - Settings settings, - ClusterService clusterService, - Map typesRegistry, - Map internalTypesRegistry, - ThreadPool threadPool, - NodeClient client, - List> preRestoreChecks - ) { - this(settings, clusterService, typesRegistry, internalTypesRegistry, threadPool, client, preRestoreChecks, MeterRegistry.NOOP); - } - @SuppressWarnings("this-escape") public RepositoriesService( Settings settings, diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java index cee2c5fd8d41d..a6678c0098239 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java @@ -22,6 +22,7 @@ import org.elasticsearch.repositories.RepositoryException; import org.elasticsearch.repositories.RepositoryMissingException; import org.elasticsearch.reservedstate.TransformState; +import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.XContentParser; @@ -143,7 +144,8 @@ public Repository create(ProjectId projectId, RepositoryMetadata metadata) { Map.of("fs", fsFactory), threadPool, mock(NodeClient.class), - null + null, + MeterRegistry.NOOP ) ); diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusActionTests.java index adaad4eede0d6..d4acb98bfdd8b 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusActionTests.java @@ -31,6 +31,7 @@ import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.TaskCancelHelper; import org.elasticsearch.tasks.TaskCancelledException; +import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.transport.CapturingTransport; @@ -74,7 +75,8 @@ public void initializeComponents() throws Exception { Map.of(), threadPool, nodeClient, - List.of() + List.of(), + MeterRegistry.NOOP ); action = new TransportSnapshotsStatusAction( transportService, diff --git a/server/src/test/java/org/elasticsearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java b/server/src/test/java/org/elasticsearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java index 464d9fe092d1c..851fe469ca6a5 100644 --- a/server/src/test/java/org/elasticsearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java +++ b/server/src/test/java/org/elasticsearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java @@ -47,6 +47,7 @@ import org.elasticsearch.indices.recovery.PeerRecoveryTargetService; import org.elasticsearch.indices.recovery.SnapshotFilesProvider; import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.Transport; @@ -549,7 +550,8 @@ private IndicesClusterStateService createIndicesClusterStateService( Collections.emptyMap(), threadPool, client, - List.of() + List.of(), + MeterRegistry.NOOP ); final PeerRecoveryTargetService recoveryTargetService = new PeerRecoveryTargetService( client, diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java index c127ca55ccddd..fd6c473c2ff27 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java @@ -53,6 +53,7 @@ import org.elasticsearch.repositories.blobstore.MeteredBlobStoreRepository; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; +import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; @@ -155,7 +156,8 @@ public void setUp() throws Exception { typesRegistry, threadPool, client, - List.of() + List.of(), + MeterRegistry.NOOP ); clusterService.start(); diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index c005691b212b2..c2b60d248a468 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -187,6 +187,7 @@ import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.FetchPhase; import org.elasticsearch.telemetry.TelemetryProvider; +import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.telemetry.tracing.Tracer; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; @@ -2410,7 +2411,8 @@ public RecyclerBytesStreamOutput newNetworkBytesStream() { emptyMap(), threadPool, client, - List.of() + List.of(), + MeterRegistry.NOOP ); final ActionFilters actionFilters = new ActionFilters(emptySet()); snapshotsService = new SnapshotsService( From faf4e7ae0fbcb69edd3ea7ab2bb8a6643329b916 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 30 Jun 2025 14:29:41 +1000 Subject: [PATCH 05/65] Make banned implementation final --- .../main/java/org/elasticsearch/repositories/Repository.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index 13403e2ce3eb8..f61da417931a0 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -100,7 +100,7 @@ default Repository create( class SnapshotMetricsFactory implements Factory { @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata) throws Exception { + public final Repository create(ProjectId projectId, RepositoryMetadata metadata) throws Exception { throw new UnsupportedOperationException("This repository requires a SnapshotMetrics implementation"); } } From 5a33bb6b4a5bf0c05b44c8e755559f6d82bf1089 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 30 Jun 2025 14:34:38 +1000 Subject: [PATCH 06/65] Improve javadoc --- .../main/java/org/elasticsearch/repositories/Repository.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index f61da417931a0..98b02053c7554 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -375,7 +375,7 @@ static boolean assertSnapshotMetaThread() { /** * Get the current count of snapshots in progress * - * @return The current number of shard snapshots in progress + * @return The current number of shard snapshots in progress metric value, or null if this repository doesn't track that */ @Nullable default LongWithAttributes getShardSnapshotsInProgress() { From b4c926f7ad32087c9878160d7d88cc142b3626fe Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 30 Jun 2025 14:44:22 +1000 Subject: [PATCH 07/65] Fix naming --- .../org/elasticsearch/repositories/RepositoriesService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java index d290292a30b36..93d6f9899c37a 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java @@ -156,7 +156,7 @@ public RepositoriesService( threadPool.relativeTimeInMillisSupplier() ); this.preRestoreChecks = preRestoreChecks; - this.snapshotMetrics = new SnapshotMetrics(meterRegistry, this::getSnapshotsInProgress); + this.snapshotMetrics = new SnapshotMetrics(meterRegistry, this::getShardSnapshotsInProgress); } /** @@ -1091,7 +1091,7 @@ public Repository createNonProjectRepository(RepositoryMetadata repositoryMetada ); } - private Collection getSnapshotsInProgress() { + private Collection getShardSnapshotsInProgress() { return repositories.values() .stream() .flatMap(repositories -> repositories.values().stream()) From d808b85a959092be01d20d3a5987713c25519f1f Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 30 Jun 2025 14:56:17 +1000 Subject: [PATCH 08/65] Fix naming, record shard duration as histogram --- .../repositories/SnapshotMetrics.java | 22 +++++++++++-------- .../blobstore/BlobStoreRepository.java | 10 ++++++--- .../blobstore/BlobStoreSnapshotMetrics.java | 7 +++--- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java index dd9e7f704312f..84dc59f8dbc06 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -10,6 +10,7 @@ package org.elasticsearch.repositories; import org.elasticsearch.cluster.metadata.RepositoryMetadata; +import org.elasticsearch.telemetry.metric.DoubleHistogram; import org.elasticsearch.telemetry.metric.LongCounter; import org.elasticsearch.telemetry.metric.LongGauge; import org.elasticsearch.telemetry.metric.LongWithAttributes; @@ -21,9 +22,10 @@ import java.util.function.Supplier; public record SnapshotMetrics( - LongCounter snapshotsStartedCounter, - LongCounter snapshotsCompletedCounter, - LongGauge snapshotsInProgressGauge, + LongCounter snapshotsShardsStartedCounter, + LongCounter snapshotsShardsCompletedCounter, + LongGauge snapshotShardsInProgressGauge, + DoubleHistogram snapshotShardsDurationHistogram, LongCounter snapshotBlobsUploadedCounter, LongCounter snapshotBytesUploadedCounter, LongCounter snapshotUploadDurationCounter, @@ -34,9 +36,10 @@ public record SnapshotMetrics( public static final SnapshotMetrics NOOP = new SnapshotMetrics(MeterRegistry.NOOP, List::of); - public static final String SNAPSHOTS_STARTED = "es.repositories.snapshots.started.total"; - public static final String SNAPSHOTS_COMPLETED = "es.repositories.snapshots.completed.total"; - public static final String SNAPSHOTS_IN_PROGRESS = "es.repositories.snapshots.current"; + public static final String SNAPSHOT_SHARDS_STARTED = "es.repositories.snapshots.shards.started.total"; + public static final String SNAPSHOT_SHARDS_COMPLETED = "es.repositories.snapshots.shards.completed.total"; + public static final String SNAPSHOT_SHARDS_IN_PROGRESS = "es.repositories.snapshots.shards.current"; + public static final String SNAPSHOT_SHARDS_DURATION = "es.repositories.snapshots.shards.duration.histogram"; public static final String SNAPSHOT_BLOBS_UPLOADED = "es.repositories.snapshots.blobs.uploaded.total"; public static final String SNAPSHOT_BYTES_UPLOADED = "es.repositories.snapshots.upload.bytes.total"; public static final String SNAPSHOT_UPLOAD_DURATION = "es.repositories.snapshots.upload.upload_time.total"; @@ -46,14 +49,15 @@ public record SnapshotMetrics( public SnapshotMetrics(MeterRegistry meterRegistry, Supplier> shardSnapshotsInProgressObserver) { this( - meterRegistry.registerLongCounter(SNAPSHOTS_STARTED, "shard snapshots started", "unit"), - meterRegistry.registerLongCounter(SNAPSHOTS_COMPLETED, "shard snapshots completed", "unit"), + meterRegistry.registerLongCounter(SNAPSHOT_SHARDS_STARTED, "shard snapshots started", "unit"), + meterRegistry.registerLongCounter(SNAPSHOT_SHARDS_COMPLETED, "shard snapshots completed", "unit"), meterRegistry.registerLongsGauge( - SNAPSHOTS_IN_PROGRESS, + SNAPSHOT_SHARDS_IN_PROGRESS, "shard snapshots in progress", "unit", shardSnapshotsInProgressObserver ), + meterRegistry.registerDoubleHistogram(SNAPSHOT_SHARDS_DURATION, "shard snapshots duration", "s"), meterRegistry.registerLongCounter(SNAPSHOT_BLOBS_UPLOADED, "snapshot blobs uploaded", "unit"), meterRegistry.registerLongCounter(SNAPSHOT_BYTES_UPLOADED, "snapshot bytes uploaded", "bytes"), meterRegistry.registerLongCounter(SNAPSHOT_UPLOAD_DURATION, "snapshot upload duration", "ns"), diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 0a8ca6f25d9ff..f8baf83c6b94a 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -3214,8 +3214,13 @@ public void snapshotShard(SnapshotShardContext context) { } private void doSnapshotShard(SnapshotShardContext context) { + final long startTimeInMillis = threadPool.absoluteTimeInMillis(); blobStoreSnapshotMetrics.shardSnapshotStarted(); - context.addListener(ActionListener.running(blobStoreSnapshotMetrics::shardSnapshotCompleted)); + context.addListener( + ActionListener.running( + () -> blobStoreSnapshotMetrics.shardSnapshotCompleted(threadPool.absoluteTimeInMillis() - startTimeInMillis) + ) + ); if (isReadOnly()) { context.onFailure(new RepositoryException(metadata.name(), "cannot snapshot shard on a readonly repository")); return; @@ -3225,7 +3230,6 @@ private void doSnapshotShard(SnapshotShardContext context) { final SnapshotId snapshotId = context.snapshotId(); final IndexShardSnapshotStatus snapshotStatus = context.status(); snapshotStatus.updateStatusDescription("snapshot task runner: setting up shard snapshot"); - final long startTime = threadPool.absoluteTimeInMillis(); try { final ShardGeneration generation = snapshotStatus.generation(); final BlobContainer shardContainer = shardContainer(context.indexId(), shardId); @@ -3347,7 +3351,7 @@ private void doSnapshotShard(SnapshotShardContext context) { snapshotStatus.updateStatusDescription("snapshot task runner: starting shard snapshot"); snapshotStatus.moveToStarted( - startTime, + startTimeInMillis, indexIncrementalFileCount, indexTotalNumberOfFiles, indexIncrementalSize, diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java index 560785a561d43..5da412e24b6dd 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -66,13 +66,14 @@ public void incrementNumberOfBlobsUploaded() { } public void shardSnapshotStarted() { - snapshotMetrics.snapshotsStartedCounter().increment(); + snapshotMetrics.snapshotsShardsStartedCounter().increment(); numberOfShardSnapshotsStarted.inc(); shardSnapshotsInProgress.inc(); } - public void shardSnapshotCompleted() { - snapshotMetrics.snapshotsCompletedCounter().increment(); + public void shardSnapshotCompleted(long durationInMillis) { + snapshotMetrics.snapshotsShardsCompletedCounter().increment(); + snapshotMetrics.snapshotShardsDurationHistogram().record(durationInMillis / 1_000f); numberOfShardSnapshotsCompleted.inc(); shardSnapshotsInProgress.dec(); } From fd55b355b3c80f7ca5a9b60e015aeb20d103d7c0 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 30 Jun 2025 15:08:49 +1000 Subject: [PATCH 09/65] Millis -> nanos --- .../repositories/blobstore/BlobStoreRepository.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index f8baf83c6b94a..2845e5e477c69 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -4138,9 +4138,9 @@ private void checkAborted() { }; final String partName = fileInfo.partName(i); logger.trace("[{}] Writing [{}] to [{}]", metadata.name(), partName, shardContainer.path()); - final long startNanos = threadPool.relativeTimeInMillis(); + final long startNanos = threadPool.relativeTimeInNanos(); shardContainer.writeBlob(OperationPurpose.SNAPSHOT_DATA, partName, inputStream, partBytes, false); - final long uploadTimeInNanos = threadPool.relativeTimeInMillis() - startNanos; + final long uploadTimeInNanos = threadPool.relativeTimeInNanos() - startNanos; blobStoreSnapshotMetrics.incrementCountersForPartUpload(partBytes, uploadTimeInNanos); logger.trace( "[{}] Writing [{}] of size [{}b] to [{}] took [{}ms]", From ffdb941a645ff1ad825d0a7c5ff71a92f16eed85 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 30 Jun 2025 15:22:04 +1000 Subject: [PATCH 10/65] Reuse totalTime --- .../index/snapshots/IndexShardSnapshotStatus.java | 4 ++++ .../repositories/blobstore/BlobStoreRepository.java | 10 +++------- .../blobstore/BlobStoreSnapshotMetrics.java | 4 +++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java b/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java index 6aa6a5e498789..d9b662b573ec2 100644 --- a/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java +++ b/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java @@ -191,6 +191,10 @@ public Stage getStage() { return stage.get(); } + public long getTotalTime() { + return totalTime; + } + public void addAbortListener(ActionListener listener) { abortListeners.addListener(listener); } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 2845e5e477c69..9529eca433e0c 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -3214,13 +3214,8 @@ public void snapshotShard(SnapshotShardContext context) { } private void doSnapshotShard(SnapshotShardContext context) { - final long startTimeInMillis = threadPool.absoluteTimeInMillis(); blobStoreSnapshotMetrics.shardSnapshotStarted(); - context.addListener( - ActionListener.running( - () -> blobStoreSnapshotMetrics.shardSnapshotCompleted(threadPool.absoluteTimeInMillis() - startTimeInMillis) - ) - ); + context.addListener(ActionListener.running(() -> blobStoreSnapshotMetrics.shardSnapshotCompleted(context.status().getTotalTime()))); if (isReadOnly()) { context.onFailure(new RepositoryException(metadata.name(), "cannot snapshot shard on a readonly repository")); return; @@ -3230,6 +3225,7 @@ private void doSnapshotShard(SnapshotShardContext context) { final SnapshotId snapshotId = context.snapshotId(); final IndexShardSnapshotStatus snapshotStatus = context.status(); snapshotStatus.updateStatusDescription("snapshot task runner: setting up shard snapshot"); + final long startTime = threadPool.absoluteTimeInMillis(); try { final ShardGeneration generation = snapshotStatus.generation(); final BlobContainer shardContainer = shardContainer(context.indexId(), shardId); @@ -3351,7 +3347,7 @@ private void doSnapshotShard(SnapshotShardContext context) { snapshotStatus.updateStatusDescription("snapshot task runner: starting shard snapshot"); snapshotStatus.moveToStarted( - startTimeInMillis, + startTime, indexIncrementalFileCount, indexTotalNumberOfFiles, indexIncrementalSize, diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java index 5da412e24b6dd..f43dcf2810466 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -73,7 +73,9 @@ public void shardSnapshotStarted() { public void shardSnapshotCompleted(long durationInMillis) { snapshotMetrics.snapshotsShardsCompletedCounter().increment(); - snapshotMetrics.snapshotShardsDurationHistogram().record(durationInMillis / 1_000f); + if (durationInMillis > 0) { + snapshotMetrics.snapshotShardsDurationHistogram().record(durationInMillis / 1_000f); + } numberOfShardSnapshotsCompleted.inc(); shardSnapshotsInProgress.dec(); } From 6e22dc6aee6cc888f88b39f4174a67cf8f9a1920 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 16 May 2025 01:59:49 +1000 Subject: [PATCH 11/65] Don't use cached time --- .../repositories/blobstore/BlobStoreRepository.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 9529eca433e0c..dd90d1ecd08dd 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -4113,18 +4113,18 @@ protected void snapshotFile(SnapshotShardContext context, FileInfo fileInfo) thr @Override public int read() throws IOException { checkAborted(); - final long beforeReadNanos = threadPool.relativeTimeInNanos(); + final long beforeReadNanos = System.nanoTime(); int value = super.read(); - blobStoreSnapshotMetrics.incrementUploadReadTime(threadPool().relativeTimeInNanos() - beforeReadNanos); + blobStoreSnapshotMetrics.incrementUploadReadTime(System.nanoTime() - beforeReadNanos); return value; } @Override public int read(byte[] b, int off, int len) throws IOException { checkAborted(); - final long beforeReadNanos = threadPool.relativeTimeInNanos(); + final long beforeReadNanos = System.nanoTime(); int amountRead = super.read(b, off, len); - blobStoreSnapshotMetrics.incrementUploadReadTime(threadPool().relativeTimeInNanos() - beforeReadNanos); + blobStoreSnapshotMetrics.incrementUploadReadTime(System.nanoTime() - beforeReadNanos); return amountRead; } @@ -4134,9 +4134,9 @@ private void checkAborted() { }; final String partName = fileInfo.partName(i); logger.trace("[{}] Writing [{}] to [{}]", metadata.name(), partName, shardContainer.path()); - final long startNanos = threadPool.relativeTimeInNanos(); + final long startNanos = System.nanoTime(); shardContainer.writeBlob(OperationPurpose.SNAPSHOT_DATA, partName, inputStream, partBytes, false); - final long uploadTimeInNanos = threadPool.relativeTimeInNanos() - startNanos; + final long uploadTimeInNanos = System.nanoTime() - startNanos; blobStoreSnapshotMetrics.incrementCountersForPartUpload(partBytes, uploadTimeInNanos); logger.trace( "[{}] Writing [{}] of size [{}b] to [{}] took [{}ms]", From 818c259452f1ca50b8204ff5a72005625b25e77d Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 1 Jul 2025 17:37:48 +1000 Subject: [PATCH 12/65] Fist pass on tests --- .../repositories/InvalidRepositoryIT.java | 11 +- .../repositories/SnapshotMetricsIT.java | 120 ++++++++++++++++++ ...etadataLoadingDuringSnapshotRestoreIT.java | 12 +- .../repositories/RepositoriesModule.java | 30 +++-- .../repositories/fs/FsRepository.java | 27 ++-- .../snapshots/mockstore/MockRepository.java | 35 +++-- ...chableSnapshotDiskThresholdIntegTests.java | 12 +- 7 files changed, 208 insertions(+), 39 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java index 62967fc2d035f..cc0dc44e10d9f 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java @@ -59,7 +59,16 @@ public UnstableRepository( BigArrays bigArrays, RecoverySettings recoverySettings ) { - super(projectId, metadata, environment, namedXContentRegistry, clusterService, bigArrays, recoverySettings); + super( + projectId, + metadata, + environment, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings, + SnapshotMetrics.NOOP + ); List unstableNodes = UNSTABLE_NODES.get(metadata.settings()); if (unstableNodes.contains(clusterService.getNodeName())) { throw new RepositoryException(metadata.name(), "Failed to create repository: current node is not stable"); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java new file mode 100644 index 0000000000000..3d06299fdfdf8 --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -0,0 +1,120 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.repositories; + +import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.PluginsService; +import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; +import org.elasticsearch.snapshots.mockstore.MockRepository; +import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.TestTelemetryPlugin; +import org.hamcrest.Matcher; + +import java.util.Collection; +import java.util.List; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.everyItem; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.lessThan; + +public class SnapshotMetricsIT extends AbstractSnapshotIntegTestCase { + + @Override + protected Collection> nodePlugins() { + return CollectionUtils.appendToCopy(super.nodePlugins(), TestTelemetryPlugin.class); + } + + public void testUpdateRepository() throws Exception { + final String repositoryName = randomIdentifier(); + + createRepository(repositoryName, "mock"); + + final String indexName = randomIdentifier(); + final int numShards = randomIntBetween(1, 10); + final int numReplicas = randomIntBetween(0, 1); + createIndex(indexName, numShards, numReplicas); + + indexRandom(true, indexName, randomIntBetween(100, 300)); + + // Block the snapshot to test "snapshot shards in progress" + MockRepository repository = asInstanceOf(MockRepository.class, getRepositoryOnMaster(repositoryName)); + repository.blockOnDataFiles(); + final String snapshotName = randomIdentifier(); + final long beforeCreateSnapshotNanos = System.nanoTime(); + try { + clusterAdmin().prepareCreateSnapshot(TEST_REQUEST_TIMEOUT, repositoryName, snapshotName) + .setIndices(indexName) + .setWaitForCompletion(false) + .get(); + + waitForBlockOnAnyDataNode(repositoryName); + collectMetrics(); + assertShardsInProgressMetricIs(hasItem(greaterThan(0L))); + } finally { + repository.unblock(); + } + + // wait for snapshot to finish to test the other metrics + awaitNumberOfSnapshotsInProgress(0); + final long snapshotElapsedTime = System.nanoTime() - beforeCreateSnapshotNanos; + collectMetrics(); + + // sanity check blobs and bytes metrics + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_BLOBS_UPLOADED), greaterThan(0L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_BYTES_UPLOADED), greaterThan(0L)); + + // sanity check duration values + final long upperBoundTimeSpentOnSnapshotThings = internalCluster().numDataNodes() * snapshotElapsedTime; + assertThat( + getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_UPLOAD_DURATION), + allOf(greaterThan(0L), lessThan(upperBoundTimeSpentOnSnapshotThings)) + ); + assertThat( + getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_UPLOAD_READ_DURATION), + allOf(greaterThan(0L), lessThan(upperBoundTimeSpentOnSnapshotThings)) + ); + + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_STARTED), equalTo((long) numShards)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_COMPLETED), equalTo((long) numShards)); + + assertShardsInProgressMetricIs(everyItem(equalTo(0L))); + } + + private static void assertShardsInProgressMetricIs(Matcher> matcher) { + final List values = allTestTelemetryPlugins().map(testTelemetryPlugin -> { + final List longGaugeMeasurement = testTelemetryPlugin.getLongGaugeMeasurement( + SnapshotMetrics.SNAPSHOT_SHARDS_IN_PROGRESS + ); + return longGaugeMeasurement.getLast().getLong(); + }).toList(); + assertThat(values, matcher); + } + + private static void collectMetrics() { + allTestTelemetryPlugins().forEach(TestTelemetryPlugin::collect); + } + + private long getTotalClusterLongCounterValue(String metricName) { + return allTestTelemetryPlugins().flatMap(testTelemetryPlugin -> testTelemetryPlugin.getLongCounterMeasurement(metricName).stream()) + .mapToLong(Measurement::getLong) + .sum(); + } + + private static Stream allTestTelemetryPlugins() { + return StreamSupport.stream(internalCluster().getDataNodeInstances(PluginsService.class).spliterator(), false) + .flatMap(pluginsService -> pluginsService.filterPlugins(TestTelemetryPlugin.class)); + } +} diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java index 89a8af49efdef..73f5c1c0b8a62 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java @@ -26,6 +26,7 @@ import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryData; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.snapshots.mockstore.MockRepository; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -185,7 +186,16 @@ public CountingMockRepository( BigArrays bigArrays, RecoverySettings recoverySettings ) { - super(projectId, metadata, environment, namedXContentRegistry, clusterService, bigArrays, recoverySettings); + super( + projectId, + metadata, + environment, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings, + SnapshotMetrics.NOOP + ); } @Override diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java index d8c1235d6ac59..a56c7036c2f1c 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java @@ -10,6 +10,8 @@ package org.elasticsearch.repositories; import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.cluster.metadata.ProjectId; +import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.BigArrays; @@ -52,18 +54,22 @@ public RepositoriesModule( ) { final RepositoriesMetrics repositoriesMetrics = new RepositoriesMetrics(telemetryProvider.getMeterRegistry()); Map factories = new HashMap<>(); - factories.put( - FsRepository.TYPE, - (projectId, metadata) -> new FsRepository( - projectId, - metadata, - env, - namedXContentRegistry, - clusterService, - bigArrays, - recoverySettings - ) - ); + factories.put(FsRepository.TYPE, new Repository.SnapshotMetricsFactory() { + + @Override + public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { + return new FsRepository( + projectId, + metadata, + env, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings, + snapshotMetrics + ); + } + }); for (RepositoryPlugin repoPlugin : repoPlugins) { Map newRepoTypes = repoPlugin.getRepositories( diff --git a/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java b/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java index 38b6a3a93d728..7db886dd6bb2f 100644 --- a/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java @@ -85,16 +85,23 @@ public FsRepository( BigArrays bigArrays, RecoverySettings recoverySettings ) { - super( - projectId, - metadata, - namedXContentRegistry, - clusterService, - bigArrays, - recoverySettings, - BlobPath.EMPTY, - SnapshotMetrics.NOOP - ); + this(projectId, metadata, environment, namedXContentRegistry, clusterService, bigArrays, recoverySettings, SnapshotMetrics.NOOP); + } + + /** + * Constructs a shared file system repository. + */ + public FsRepository( + ProjectId projectId, + RepositoryMetadata metadata, + Environment environment, + NamedXContentRegistry namedXContentRegistry, + ClusterService clusterService, + BigArrays bigArrays, + RecoverySettings recoverySettings, + SnapshotMetrics snapshotMetrics + ) { + super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, BlobPath.EMPTY, snapshotMetrics); this.environment = environment; String location = REPOSITORIES_LOCATION_SETTING.get(metadata.settings()); if (location.isEmpty()) { diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java index 252091bb45d82..7a42de93ff1de 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java @@ -40,6 +40,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -88,18 +89,22 @@ public Map getRepositories( RecoverySettings recoverySettings, RepositoriesMetrics repositoriesMetrics ) { - return Collections.singletonMap( - "mock", - (projectId, metadata) -> new MockRepository( - projectId, - metadata, - env, - namedXContentRegistry, - clusterService, - bigArrays, - recoverySettings - ) - ); + return Collections.singletonMap("mock", new SnapshotMetricsFactory() { + + @Override + public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { + return new MockRepository( + projectId, + metadata, + env, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings, + snapshotMetrics + ); + } + }); } @Override @@ -191,7 +196,8 @@ public MockRepository( NamedXContentRegistry namedXContentRegistry, ClusterService clusterService, BigArrays bigArrays, - RecoverySettings recoverySettings + RecoverySettings recoverySettings, + SnapshotMetrics snapshotMetrics ) { super( projectId, @@ -200,7 +206,8 @@ public MockRepository( namedXContentRegistry, clusterService, bigArrays, - recoverySettings + recoverySettings, + snapshotMetrics ); randomControlIOExceptionRate = metadata.settings().getAsDouble("random_control_io_exception_rate", 0.0); randomDataFileIOExceptionRate = metadata.settings().getAsDouble("random_data_file_io_exception_rate", 0.0); diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java index 518ff2354f498..237e2387d0a4f 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java @@ -39,6 +39,7 @@ import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotState; @@ -407,7 +408,16 @@ public CustomMockRepository( BigArrays bigArrays, RecoverySettings recoverySettings ) { - super(projectId, metadata, environment, namedXContentRegistry, clusterService, bigArrays, recoverySettings); + super( + projectId, + metadata, + environment, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings, + SnapshotMetrics.NOOP + ); } private void unlockRestore() { From 9ba0d4a06bf7224798a36fe3f38156b8eb115040 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 1 Jul 2025 21:32:22 +1000 Subject: [PATCH 13/65] Fix SnapshotMetricsIT --- .../org/elasticsearch/repositories/SnapshotMetricsIT.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 3d06299fdfdf8..b275c750cf344 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -13,7 +13,6 @@ import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; -import org.elasticsearch.snapshots.mockstore.MockRepository; import org.elasticsearch.telemetry.Measurement; import org.elasticsearch.telemetry.TestTelemetryPlugin; import org.hamcrest.Matcher; @@ -37,7 +36,7 @@ protected Collection> nodePlugins() { return CollectionUtils.appendToCopy(super.nodePlugins(), TestTelemetryPlugin.class); } - public void testUpdateRepository() throws Exception { + public void testSnapshotAPMMetrics() throws Exception { final String repositoryName = randomIdentifier(); createRepository(repositoryName, "mock"); @@ -50,8 +49,7 @@ public void testUpdateRepository() throws Exception { indexRandom(true, indexName, randomIntBetween(100, 300)); // Block the snapshot to test "snapshot shards in progress" - MockRepository repository = asInstanceOf(MockRepository.class, getRepositoryOnMaster(repositoryName)); - repository.blockOnDataFiles(); + blockAllDataNodes(repositoryName); final String snapshotName = randomIdentifier(); final long beforeCreateSnapshotNanos = System.nanoTime(); try { @@ -64,7 +62,7 @@ public void testUpdateRepository() throws Exception { collectMetrics(); assertShardsInProgressMetricIs(hasItem(greaterThan(0L))); } finally { - repository.unblock(); + unblockAllDataNodes(repositoryName); } // wait for snapshot to finish to test the other metrics From 45909db829dc7a8a3efce6a12b341df27b2622e3 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 1 Jul 2025 21:32:58 +1000 Subject: [PATCH 14/65] Naming --- .../org/elasticsearch/repositories/SnapshotMetricsIT.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index b275c750cf344..4636191ea375e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -67,7 +67,7 @@ public void testSnapshotAPMMetrics() throws Exception { // wait for snapshot to finish to test the other metrics awaitNumberOfSnapshotsInProgress(0); - final long snapshotElapsedTime = System.nanoTime() - beforeCreateSnapshotNanos; + final long snapshotElapsedTimeNanos = System.nanoTime() - beforeCreateSnapshotNanos; collectMetrics(); // sanity check blobs and bytes metrics @@ -75,14 +75,14 @@ public void testSnapshotAPMMetrics() throws Exception { assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_BYTES_UPLOADED), greaterThan(0L)); // sanity check duration values - final long upperBoundTimeSpentOnSnapshotThings = internalCluster().numDataNodes() * snapshotElapsedTime; + final long upperBoundTimeSpentOnSnapshotThingsNanos = internalCluster().numDataNodes() * snapshotElapsedTimeNanos; assertThat( getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_UPLOAD_DURATION), - allOf(greaterThan(0L), lessThan(upperBoundTimeSpentOnSnapshotThings)) + allOf(greaterThan(0L), lessThan(upperBoundTimeSpentOnSnapshotThingsNanos)) ); assertThat( getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_UPLOAD_READ_DURATION), - allOf(greaterThan(0L), lessThan(upperBoundTimeSpentOnSnapshotThings)) + allOf(greaterThan(0L), lessThan(upperBoundTimeSpentOnSnapshotThingsNanos)) ); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_STARTED), equalTo((long) numShards)); From f42f9bd79cbd8f96d676f367386b295e7676d3f6 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Wed, 2 Jul 2025 15:48:04 +1000 Subject: [PATCH 15/65] Assert on throttling metrics --- .../repositories/SnapshotMetricsIT.java | 45 ++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 4636191ea375e..fbb8f6d7a5011 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -9,6 +9,10 @@ package org.elasticsearch.repositories; +import org.elasticsearch.action.admin.indices.stats.IndexStats; +import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; +import org.elasticsearch.action.admin.indices.stats.ShardStats; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.PluginsService; @@ -37,10 +41,6 @@ protected Collection> nodePlugins() { } public void testSnapshotAPMMetrics() throws Exception { - final String repositoryName = randomIdentifier(); - - createRepository(repositoryName, "mock"); - final String indexName = randomIdentifier(); final int numShards = randomIntBetween(1, 10); final int numReplicas = randomIntBetween(0, 1); @@ -48,6 +48,27 @@ public void testSnapshotAPMMetrics() throws Exception { indexRandom(true, indexName, randomIntBetween(100, 300)); + IndicesStatsResponse indicesStats = indicesAdmin().prepareStats(indexName).get(); + IndexStats indexStats = indicesStats.getIndex(indexName); + long totalSizeInBytes = 0; + for (ShardStats shard : indexStats.getShards()) { + totalSizeInBytes += shard.getStats().getStore().sizeInBytes(); + } + logger.info("--> total shards size: {} bytes", totalSizeInBytes); + + final String repositoryName = randomIdentifier(); + + // we want to ensure some throttling, but not too much that it slows the test down. 5 seemed a reasonable multiple to ensure that. + int shardSizeMultipleToEnsureThrottling = 5; + createRepository( + repositoryName, + "mock", + randomRepositorySettings().put( + "max_snapshot_bytes_per_sec", + ByteSizeValue.ofBytes(totalSizeInBytes * shardSizeMultipleToEnsureThrottling) + ).put("max_restore_bytes_per_sec", ByteSizeValue.ofBytes(totalSizeInBytes * shardSizeMultipleToEnsureThrottling)) + ); + // Block the snapshot to test "snapshot shards in progress" blockAllDataNodes(repositoryName); final String snapshotName = randomIdentifier(); @@ -70,9 +91,11 @@ public void testSnapshotAPMMetrics() throws Exception { final long snapshotElapsedTimeNanos = System.nanoTime() - beforeCreateSnapshotNanos; collectMetrics(); - // sanity check blobs and bytes metrics + // sanity check blobs, bytes and throttling metrics assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_BLOBS_UPLOADED), greaterThan(0L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_BYTES_UPLOADED), greaterThan(0L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_CREATE_THROTTLE_DURATION), greaterThan(0L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_RESTORE_THROTTLE_DURATION), equalTo(0L)); // sanity check duration values final long upperBoundTimeSpentOnSnapshotThingsNanos = internalCluster().numDataNodes() * snapshotElapsedTimeNanos; @@ -89,6 +112,18 @@ public void testSnapshotAPMMetrics() throws Exception { assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_COMPLETED), equalTo((long) numShards)); assertShardsInProgressMetricIs(everyItem(equalTo(0L))); + + // Restore the snapshot + clusterAdmin().prepareRestoreSnapshot(TEST_REQUEST_TIMEOUT, repositoryName, snapshotName) + .setIndices(indexName) + .setWaitForCompletion(true) + .setRenamePattern("(.+)") + .setRenameReplacement("restored-$1") + .get(); + collectMetrics(); + + // assert we throttled on restore + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_RESTORE_THROTTLE_DURATION), greaterThan(0L)); } private static void assertShardsInProgressMetricIs(Matcher> matcher) { From 345cc593ebe024c4a3a13eb32a7d0f5111ac1ce7 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 3 Jul 2025 11:47:10 +1000 Subject: [PATCH 16/65] Add snapshot APM metrics --- .../repositories/SnapshotMetricsIT.java | 41 ++++++++++++++++++- .../repositories/RepositoriesService.java | 4 ++ .../repositories/SnapshotMetrics.java | 9 ++++ .../snapshots/SnapshotsService.java | 8 ++++ 4 files changed, 60 insertions(+), 2 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index fbb8f6d7a5011..e5c8eeff9dafd 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -14,11 +14,13 @@ import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; import org.elasticsearch.telemetry.Measurement; import org.elasticsearch.telemetry.TestTelemetryPlugin; +import org.elasticsearch.threadpool.ThreadPool; import org.hamcrest.Matcher; import java.util.Collection; @@ -31,6 +33,7 @@ import static org.hamcrest.Matchers.everyItem; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasItem; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.lessThan; public class SnapshotMetricsIT extends AbstractSnapshotIntegTestCase { @@ -82,6 +85,10 @@ public void testSnapshotAPMMetrics() throws Exception { waitForBlockOnAnyDataNode(repositoryName); collectMetrics(); assertShardsInProgressMetricIs(hasItem(greaterThan(0L))); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_STARTED), equalTo(1L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_COMPLETED), equalTo(0L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_STARTED), greaterThan(0L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_COMPLETED), equalTo(0L)); } finally { unblockAllDataNodes(repositoryName); } @@ -96,9 +103,31 @@ public void testSnapshotAPMMetrics() throws Exception { assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_BYTES_UPLOADED), greaterThan(0L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_CREATE_THROTTLE_DURATION), greaterThan(0L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_RESTORE_THROTTLE_DURATION), equalTo(0L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_STARTED), equalTo(1L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_COMPLETED), equalTo(1L)); + + // Sanity check shard duration observations + assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_SHARDS_DURATION, hasSize(numShards)); + assertDoubleHistogramMetrics( + SnapshotMetrics.SNAPSHOT_SHARDS_DURATION, + everyItem(lessThan(TimeValue.timeValueNanos(snapshotElapsedTimeNanos).secondsFrac())) + ); + + // Sanity check snapshot observations + assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_DURATION, hasSize(1)); + assertDoubleHistogramMetrics( + SnapshotMetrics.SNAPSHOT_DURATION, + everyItem(lessThan(TimeValue.timeValueNanos(snapshotElapsedTimeNanos).secondsFrac())) + ); + + // Work out the maximum amount of concurrency per node + final ThreadPool tp = internalCluster().getDataNodeInstance(ThreadPool.class); + int snapshotThreadPoolSize = tp.info(ThreadPool.Names.SNAPSHOT).getMax(); + int maximumPerNodeConcurrency = Math.max(snapshotThreadPoolSize, numShards); // sanity check duration values - final long upperBoundTimeSpentOnSnapshotThingsNanos = internalCluster().numDataNodes() * snapshotElapsedTimeNanos; + final long upperBoundTimeSpentOnSnapshotThingsNanos = internalCluster().numDataNodes() * maximumPerNodeConcurrency + * snapshotElapsedTimeNanos; assertThat( getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_UPLOAD_DURATION), allOf(greaterThan(0L), lessThan(upperBoundTimeSpentOnSnapshotThingsNanos)) @@ -126,6 +155,14 @@ public void testSnapshotAPMMetrics() throws Exception { assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_RESTORE_THROTTLE_DURATION), greaterThan(0L)); } + private static void assertDoubleHistogramMetrics(String metricName, Matcher> matcher) { + final List values = allTestTelemetryPlugins().flatMap(testTelemetryPlugin -> { + final List doubleHistogramMeasurement = testTelemetryPlugin.getDoubleHistogramMeasurement(metricName); + return doubleHistogramMeasurement.stream().map(Measurement::getDouble); + }).toList(); + assertThat(values, matcher); + } + private static void assertShardsInProgressMetricIs(Matcher> matcher) { final List values = allTestTelemetryPlugins().map(testTelemetryPlugin -> { final List longGaugeMeasurement = testTelemetryPlugin.getLongGaugeMeasurement( @@ -147,7 +184,7 @@ private long getTotalClusterLongCounterValue(String metricName) { } private static Stream allTestTelemetryPlugins() { - return StreamSupport.stream(internalCluster().getDataNodeInstances(PluginsService.class).spliterator(), false) + return StreamSupport.stream(internalCluster().getDataOrMasterNodeInstances(PluginsService.class).spliterator(), false) .flatMap(pluginsService -> pluginsService.filterPlugins(TestTelemetryPlugin.class)); } } diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java index 93d6f9899c37a..82b4fab1d4d3e 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java @@ -1269,6 +1269,10 @@ public RepositoryUsageStats getUsageStats() { ); } + public SnapshotMetrics getSnapshotMetrics() { + return snapshotMetrics; + } + @Override protected void doStart() {} diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java index 84dc59f8dbc06..a22c2d72e4a20 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -22,6 +22,9 @@ import java.util.function.Supplier; public record SnapshotMetrics( + LongCounter snapshotsStartedCounter, + LongCounter snapshotsCompletedCounter, + DoubleHistogram snapshotsDurationHistogram, LongCounter snapshotsShardsStartedCounter, LongCounter snapshotsShardsCompletedCounter, LongGauge snapshotShardsInProgressGauge, @@ -36,6 +39,9 @@ public record SnapshotMetrics( public static final SnapshotMetrics NOOP = new SnapshotMetrics(MeterRegistry.NOOP, List::of); + public static final String SNAPSHOT_STARTED = "es.repositories.snapshots.started.total"; + public static final String SNAPSHOT_COMPLETED = "es.repositories.snapshots.completed.total"; + public static final String SNAPSHOT_DURATION = "es.repositories.snapshots.duration.histogram"; public static final String SNAPSHOT_SHARDS_STARTED = "es.repositories.snapshots.shards.started.total"; public static final String SNAPSHOT_SHARDS_COMPLETED = "es.repositories.snapshots.shards.completed.total"; public static final String SNAPSHOT_SHARDS_IN_PROGRESS = "es.repositories.snapshots.shards.current"; @@ -49,6 +55,9 @@ public record SnapshotMetrics( public SnapshotMetrics(MeterRegistry meterRegistry, Supplier> shardSnapshotsInProgressObserver) { this( + meterRegistry.registerLongCounter(SNAPSHOT_STARTED, "snapshots started", "unit"), + meterRegistry.registerLongCounter(SNAPSHOT_COMPLETED, "snapshots completed", "unit"), + meterRegistry.registerDoubleHistogram(SNAPSHOT_DURATION, "snapshots duration", "s"), meterRegistry.registerLongCounter(SNAPSHOT_SHARDS_STARTED, "shard snapshots started", "unit"), meterRegistry.registerLongCounter(SNAPSHOT_SHARDS_COMPLETED, "shard snapshots completed", "unit"), meterRegistry.registerLongsGauge( diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 8a1f1ad79a17f..70291fcd050b3 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -101,6 +101,7 @@ import org.elasticsearch.repositories.ShardGeneration; import org.elasticsearch.repositories.ShardGenerations; import org.elasticsearch.repositories.ShardSnapshotResult; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.tasks.Task; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; @@ -1596,6 +1597,11 @@ protected void doRun() { @Override public void onResponse(List> actionListeners) { completeListenersIgnoringException(actionListeners, snapshotInfo); + final Map attributes = SnapshotMetrics.createAttributesMap(repo.getMetadata()); + final SnapshotMetrics snapshotMetrics = repositoriesService.getSnapshotMetrics(); + snapshotMetrics.snapshotsCompletedCounter().incrementBy(1, attributes); + snapshotMetrics.snapshotsDurationHistogram() + .record((snapshotInfo.endTime() - snapshotInfo.startTime()) / 1_000.0, attributes); logger.info("snapshot [{}] completed with state [{}]", snapshot, snapshotInfo.state()); } @@ -4323,6 +4329,8 @@ private SnapshotsInProgress createSnapshot( final var res = snapshotsInProgress.withAddedEntry(newEntry); taskContext.success(() -> { logger.info("snapshot [{}] started", snapshot); + final Map attributes = SnapshotMetrics.createAttributesMap(repository.getMetadata()); + repositoriesService.getSnapshotMetrics().snapshotsStartedCounter().incrementBy(1, attributes); createSnapshotTask.listener.onResponse(snapshot); if (newEntry.state().completed()) { endSnapshot(newEntry, currentState.metadata(), createSnapshotTask.repositoryData); From b341645385b68e692d6b7a6858c9d113530a4e4d Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 3 Jul 2025 16:09:03 +1000 Subject: [PATCH 17/65] Add snapshot metrics --- .../repositories/SnapshotMetricsIT.java | 41 ++++++++++++++--- .../elasticsearch/node/NodeConstruction.java | 8 +++- .../repositories/RepositoriesModule.java | 5 +- .../repositories/RepositoriesService.java | 10 ++-- .../repositories/SnapshotMetrics.java | 46 +++++++++++-------- .../blobstore/BlobStoreRepository.java | 4 +- .../blobstore/BlobStoreSnapshotMetrics.java | 12 +++-- .../snapshots/SnapshotsService.java | 39 ++++++++++++++-- .../ReservedRepositoryActionTests.java | 4 +- .../TransportSnapshotsStatusActionTests.java | 4 +- ...ClusterStateServiceRandomUpdatesTests.java | 4 +- .../repositories/RepositoriesModuleTests.java | 12 +++-- .../RepositoriesServiceTests.java | 3 +- .../snapshots/SnapshotResiliencyTests.java | 7 +-- 14 files changed, 136 insertions(+), 63 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index e5c8eeff9dafd..466ab410fadf6 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -12,6 +12,7 @@ import org.elasticsearch.action.admin.indices.stats.IndexStats; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.action.admin.indices.stats.ShardStats; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.core.TimeValue; @@ -20,6 +21,7 @@ import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; import org.elasticsearch.telemetry.Measurement; import org.elasticsearch.telemetry.TestTelemetryPlugin; +import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.threadpool.ThreadPool; import org.hamcrest.Matcher; @@ -28,6 +30,7 @@ import java.util.stream.Stream; import java.util.stream.StreamSupport; +import static org.elasticsearch.threadpool.ThreadPool.ESTIMATED_TIME_INTERVAL_SETTING; import static org.hamcrest.Matchers.allOf; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.everyItem; @@ -36,6 +39,7 @@ import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.lessThan; +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST) public class SnapshotMetricsIT extends AbstractSnapshotIntegTestCase { @Override @@ -43,6 +47,15 @@ protected Collection> nodePlugins() { return CollectionUtils.appendToCopy(super.nodePlugins(), TestTelemetryPlugin.class); } + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + // Make sanity checking duration histograms possible + .put(ESTIMATED_TIME_INTERVAL_SETTING.getKey(), "0s") + .build(); + } + public void testSnapshotAPMMetrics() throws Exception { final String indexName = randomIdentifier(); final int numShards = randomIntBetween(1, 10); @@ -61,8 +74,8 @@ public void testSnapshotAPMMetrics() throws Exception { final String repositoryName = randomIdentifier(); - // we want to ensure some throttling, but not too much that it slows the test down. 5 seemed a reasonable multiple to ensure that. - int shardSizeMultipleToEnsureThrottling = 5; + // we want to ensure some throttling, but not too much that it slows the test down. 3 seemed a reasonable multiple to ensure that. + int shardSizeMultipleToEnsureThrottling = 3; createRepository( repositoryName, "mock", @@ -84,9 +97,10 @@ public void testSnapshotAPMMetrics() throws Exception { waitForBlockOnAnyDataNode(repositoryName); collectMetrics(); + assertSnapshotsInProgressMetricIs(greaterThan(0L)); assertShardsInProgressMetricIs(hasItem(greaterThan(0L))); - assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_STARTED), equalTo(1L)); - assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_COMPLETED), equalTo(0L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_STARTED), equalTo(1L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_COMPLETED), equalTo(0L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_STARTED), greaterThan(0L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_COMPLETED), equalTo(0L)); } finally { @@ -103,8 +117,8 @@ public void testSnapshotAPMMetrics() throws Exception { assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_BYTES_UPLOADED), greaterThan(0L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_CREATE_THROTTLE_DURATION), greaterThan(0L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_RESTORE_THROTTLE_DURATION), equalTo(0L)); - assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_STARTED), equalTo(1L)); - assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_COMPLETED), equalTo(1L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_STARTED), equalTo(1L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_COMPLETED), equalTo(1L)); // Sanity check shard duration observations assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_SHARDS_DURATION, hasSize(numShards)); @@ -140,6 +154,7 @@ public void testSnapshotAPMMetrics() throws Exception { assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_STARTED), equalTo((long) numShards)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_COMPLETED), equalTo((long) numShards)); + assertSnapshotsInProgressMetricIs(equalTo(0L)); assertShardsInProgressMetricIs(everyItem(equalTo(0L))); // Restore the snapshot @@ -173,6 +188,20 @@ private static void assertShardsInProgressMetricIs(Matcher> m assertThat(values, matcher); } + private static void assertSnapshotsInProgressMetricIs(Matcher matcher) { + final List values = internalCluster().getCurrentMasterNodeInstance(PluginsService.class) + .filterPlugins(TestTelemetryPlugin.class) + .map(testTelemetryPlugin -> { + final List longGaugeMeasurement = testTelemetryPlugin.getLongGaugeMeasurement( + SnapshotMetrics.SNAPSHOTS_IN_PROGRESS + ); + return longGaugeMeasurement.getLast().getLong(); + }) + .toList(); + assertThat(values, hasSize(1)); + assertThat(values.getFirst(), matcher); + } + private static void collectMetrics() { allTestTelemetryPlugins().forEach(TestTelemetryPlugin::collect); } diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index dd134549ab014..1362033a67d3f 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -188,6 +188,7 @@ import org.elasticsearch.readiness.ReadinessService; import org.elasticsearch.repositories.RepositoriesModule; import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.reservedstate.ReservedClusterStateHandler; import org.elasticsearch.reservedstate.ReservedProjectStateHandler; import org.elasticsearch.reservedstate.ReservedStateHandlerProvider; @@ -728,6 +729,7 @@ private void construct( BigArrays bigArrays = serviceProvider.newBigArrays(pluginsService, pageCacheRecycler, circuitBreakerService); final RecoverySettings recoverySettings = new RecoverySettings(settings, settingsModule.getClusterSettings()); + SnapshotMetrics snapshotMetrics = new SnapshotMetrics(telemetryProvider.getMeterRegistry()); RepositoriesModule repositoriesModule = new RepositoriesModule( environment, pluginsService.filterPlugins(RepositoryPlugin.class).toList(), @@ -737,7 +739,8 @@ private void construct( bigArrays, xContentRegistry, recoverySettings, - telemetryProvider + telemetryProvider, + snapshotMetrics ); RepositoriesService repositoriesService = repositoriesModule.getRepositoryService(); final SetOnce rerouteServiceReference = new SetOnce<>(); @@ -1115,7 +1118,8 @@ public Map queryFields() { repositoriesService, transportService, actionModule.getActionFilters(), - systemIndices + systemIndices, + snapshotMetrics ); SnapshotShardsService snapshotShardsService = new SnapshotShardsService( settings, diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java index a56c7036c2f1c..2c784195d12ef 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java @@ -50,7 +50,8 @@ public RepositoriesModule( BigArrays bigArrays, NamedXContentRegistry namedXContentRegistry, RecoverySettings recoverySettings, - TelemetryProvider telemetryProvider + TelemetryProvider telemetryProvider, + SnapshotMetrics snapshotMetrics ) { final RepositoriesMetrics repositoriesMetrics = new RepositoriesMetrics(telemetryProvider.getMeterRegistry()); Map factories = new HashMap<>(); @@ -142,7 +143,7 @@ public Repository create(ProjectId projectId, RepositoryMetadata metadata, Snaps threadPool, client, preRestoreChecks, - telemetryProvider.getMeterRegistry() + snapshotMetrics ); } diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java index 82b4fab1d4d3e..4ec3cf2cdc338 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java @@ -59,7 +59,6 @@ import org.elasticsearch.repositories.blobstore.MeteredBlobStoreRepository; import org.elasticsearch.snapshots.Snapshot; import org.elasticsearch.telemetry.metric.LongWithAttributes; -import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.threadpool.ThreadPool; import java.io.IOException; @@ -136,7 +135,7 @@ public RepositoriesService( ThreadPool threadPool, NodeClient client, List> preRestoreChecks, - MeterRegistry meterRegistry + SnapshotMetrics snapshotMetrics ) { this.typesRegistry = typesRegistry; this.internalTypesRegistry = internalTypesRegistry; @@ -156,7 +155,8 @@ public RepositoriesService( threadPool.relativeTimeInMillisSupplier() ); this.preRestoreChecks = preRestoreChecks; - this.snapshotMetrics = new SnapshotMetrics(meterRegistry, this::getShardSnapshotsInProgress); + this.snapshotMetrics = snapshotMetrics; + snapshotMetrics.createSnapshotShardsInProgressMetric(this::getShardSnapshotsInProgress); } /** @@ -1269,10 +1269,6 @@ public RepositoryUsageStats getUsageStats() { ); } - public SnapshotMetrics getSnapshotMetrics() { - return snapshotMetrics; - } - @Override protected void doStart() {} diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java index a22c2d72e4a20..9ba3c52aab095 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -9,15 +9,14 @@ package org.elasticsearch.repositories; +import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.telemetry.metric.DoubleHistogram; import org.elasticsearch.telemetry.metric.LongCounter; -import org.elasticsearch.telemetry.metric.LongGauge; import org.elasticsearch.telemetry.metric.LongWithAttributes; import org.elasticsearch.telemetry.metric.MeterRegistry; import java.util.Collection; -import java.util.List; import java.util.Map; import java.util.function.Supplier; @@ -27,20 +26,21 @@ public record SnapshotMetrics( DoubleHistogram snapshotsDurationHistogram, LongCounter snapshotsShardsStartedCounter, LongCounter snapshotsShardsCompletedCounter, - LongGauge snapshotShardsInProgressGauge, DoubleHistogram snapshotShardsDurationHistogram, LongCounter snapshotBlobsUploadedCounter, LongCounter snapshotBytesUploadedCounter, LongCounter snapshotUploadDurationCounter, LongCounter snapshotUploadReadDurationCounter, LongCounter snapshotCreateThrottleDurationCounter, - LongCounter snapshotRestoreThrottleDurationCounter + LongCounter snapshotRestoreThrottleDurationCounter, + MeterRegistry meterRegistry ) { - public static final SnapshotMetrics NOOP = new SnapshotMetrics(MeterRegistry.NOOP, List::of); + public static final SnapshotMetrics NOOP = new SnapshotMetrics(MeterRegistry.NOOP); - public static final String SNAPSHOT_STARTED = "es.repositories.snapshots.started.total"; - public static final String SNAPSHOT_COMPLETED = "es.repositories.snapshots.completed.total"; + public static final String SNAPSHOTS_STARTED = "es.repositories.snapshots.started.total"; + public static final String SNAPSHOTS_COMPLETED = "es.repositories.snapshots.completed.total"; + public static final String SNAPSHOTS_IN_PROGRESS = "es.repositories.snapshots.current"; public static final String SNAPSHOT_DURATION = "es.repositories.snapshots.duration.histogram"; public static final String SNAPSHOT_SHARDS_STARTED = "es.repositories.snapshots.shards.started.total"; public static final String SNAPSHOT_SHARDS_COMPLETED = "es.repositories.snapshots.shards.completed.total"; @@ -53,30 +53,38 @@ public record SnapshotMetrics( public static final String SNAPSHOT_CREATE_THROTTLE_DURATION = "es.repositories.snapshots.create_throttling.time.total"; public static final String SNAPSHOT_RESTORE_THROTTLE_DURATION = "es.repositories.snapshots.restore_throttling.time.total"; - public SnapshotMetrics(MeterRegistry meterRegistry, Supplier> shardSnapshotsInProgressObserver) { + public SnapshotMetrics(MeterRegistry meterRegistry) { this( - meterRegistry.registerLongCounter(SNAPSHOT_STARTED, "snapshots started", "unit"), - meterRegistry.registerLongCounter(SNAPSHOT_COMPLETED, "snapshots completed", "unit"), + meterRegistry.registerLongCounter(SNAPSHOTS_STARTED, "snapshots started", "unit"), + meterRegistry.registerLongCounter(SNAPSHOTS_COMPLETED, "snapshots completed", "unit"), meterRegistry.registerDoubleHistogram(SNAPSHOT_DURATION, "snapshots duration", "s"), meterRegistry.registerLongCounter(SNAPSHOT_SHARDS_STARTED, "shard snapshots started", "unit"), meterRegistry.registerLongCounter(SNAPSHOT_SHARDS_COMPLETED, "shard snapshots completed", "unit"), - meterRegistry.registerLongsGauge( - SNAPSHOT_SHARDS_IN_PROGRESS, - "shard snapshots in progress", - "unit", - shardSnapshotsInProgressObserver - ), meterRegistry.registerDoubleHistogram(SNAPSHOT_SHARDS_DURATION, "shard snapshots duration", "s"), meterRegistry.registerLongCounter(SNAPSHOT_BLOBS_UPLOADED, "snapshot blobs uploaded", "unit"), meterRegistry.registerLongCounter(SNAPSHOT_BYTES_UPLOADED, "snapshot bytes uploaded", "bytes"), meterRegistry.registerLongCounter(SNAPSHOT_UPLOAD_DURATION, "snapshot upload duration", "ns"), meterRegistry.registerLongCounter(SNAPSHOT_UPLOAD_READ_DURATION, "time spent in read() calls when snapshotting", "ns"), meterRegistry.registerLongCounter(SNAPSHOT_CREATE_THROTTLE_DURATION, "time throttled in snapshot create", "bytes"), - meterRegistry.registerLongCounter(SNAPSHOT_RESTORE_THROTTLE_DURATION, "time throttled in snapshot restore", "bytes") + meterRegistry.registerLongCounter(SNAPSHOT_RESTORE_THROTTLE_DURATION, "time throttled in snapshot restore", "bytes"), + meterRegistry ); } - public static Map createAttributesMap(RepositoryMetadata meta) { - return Map.of("repo_type", meta.type(), "repo_name", meta.name()); + public void createSnapshotShardsInProgressMetric(Supplier> shardSnapshotsInProgressObserver) { + meterRegistry.registerLongsGauge( + SNAPSHOT_SHARDS_IN_PROGRESS, + "shard snapshots in progress", + "unit", + shardSnapshotsInProgressObserver + ); + } + + public void createSnapshotsInProgressMetric(Supplier> snapshotsInProgressObserver) { + meterRegistry.registerLongsGauge(SNAPSHOTS_IN_PROGRESS, "snapshots in progress", "unit", snapshotsInProgressObserver); + } + + public static Map createAttributesMap(ProjectId projectId, RepositoryMetadata meta) { + return Map.of("project_id", projectId.id(), "repo_type", meta.type(), "repo_name", meta.name()); } } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index dd90d1ecd08dd..d77f96b27bf28 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -528,7 +528,7 @@ protected BlobStoreRepository( threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), threadPool.executor(ThreadPool.Names.SNAPSHOT) ); - this.blobStoreSnapshotMetrics = new BlobStoreSnapshotMetrics(metadata, snapshotMetrics); + this.blobStoreSnapshotMetrics = new BlobStoreSnapshotMetrics(projectId, metadata, snapshotMetrics); } @Override @@ -3215,7 +3215,7 @@ public void snapshotShard(SnapshotShardContext context) { private void doSnapshotShard(SnapshotShardContext context) { blobStoreSnapshotMetrics.shardSnapshotStarted(); - context.addListener(ActionListener.running(() -> blobStoreSnapshotMetrics.shardSnapshotCompleted(context.status().getTotalTime()))); + context.addListener(ActionListener.running(() -> blobStoreSnapshotMetrics.shardSnapshotCompleted(context.status()))); if (isReadOnly()) { context.onFailure(new RepositoryException(metadata.name(), "cannot snapshot shard on a readonly repository")); return; diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java index f43dcf2810466..cfad743d88805 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -9,8 +9,10 @@ package org.elasticsearch.repositories.blobstore; +import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.common.metrics.CounterMetric; +import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus; import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.telemetry.metric.LongWithAttributes; @@ -30,9 +32,9 @@ public class BlobStoreSnapshotMetrics { private final CounterMetric numberOfShardSnapshotsCompleted = new CounterMetric(); private final Map metricAttributes; - public BlobStoreSnapshotMetrics(RepositoryMetadata repositoryMetadata, SnapshotMetrics snapshotMetrics) { + public BlobStoreSnapshotMetrics(ProjectId projectId, RepositoryMetadata repositoryMetadata, SnapshotMetrics snapshotMetrics) { this.snapshotMetrics = snapshotMetrics; - metricAttributes = SnapshotMetrics.createAttributesMap(repositoryMetadata); + metricAttributes = SnapshotMetrics.createAttributesMap(projectId, repositoryMetadata); } public void incrementSnapshotRateLimitingTimeInNanos(long throttleTimeNanos) { @@ -71,10 +73,10 @@ public void shardSnapshotStarted() { shardSnapshotsInProgress.inc(); } - public void shardSnapshotCompleted(long durationInMillis) { + public void shardSnapshotCompleted(IndexShardSnapshotStatus status) { snapshotMetrics.snapshotsShardsCompletedCounter().increment(); - if (durationInMillis > 0) { - snapshotMetrics.snapshotShardsDurationHistogram().record(durationInMillis / 1_000f); + if (status.getStage() == IndexShardSnapshotStatus.Stage.DONE) { + snapshotMetrics.snapshotShardsDurationHistogram().record(status.getTotalTime() / 1_000f); } numberOfShardSnapshotsCompleted.inc(); shardSnapshotsInProgress.dec(); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 70291fcd050b3..bced678a731a1 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -103,6 +103,7 @@ import org.elasticsearch.repositories.ShardSnapshotResult; import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.tasks.Task; +import org.elasticsearch.telemetry.metric.LongWithAttributes; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; @@ -198,6 +199,8 @@ public final class SnapshotsService extends AbstractLifecycleComponent implement private final SystemIndices systemIndices; + private final SnapshotMetrics snapshotMetrics; + private final MasterServiceTaskQueue masterServiceTaskQueue; private final ShardSnapshotUpdateCompletionHandler shardSnapshotUpdateCompletionHandler; @@ -225,7 +228,8 @@ public SnapshotsService( RepositoriesService repositoriesService, TransportService transportService, ActionFilters actionFilters, - SystemIndices systemIndices + SystemIndices systemIndices, + SnapshotMetrics snapshotMetrics ) { this.clusterService = clusterService; this.rerouteService = rerouteService; @@ -233,6 +237,8 @@ public SnapshotsService( this.repositoriesService = repositoriesService; this.threadPool = transportService.getThreadPool(); this.transportService = transportService; + this.snapshotMetrics = snapshotMetrics; + snapshotMetrics.createSnapshotsInProgressMetric(this::getSnapshotsInProgress); // The constructor of UpdateSnapshotStatusAction will register itself to the TransportService. this.updateSnapshotStatusHandler = new UpdateSnapshotStatusAction(transportService, clusterService, threadPool, actionFilters); @@ -1597,8 +1603,10 @@ protected void doRun() { @Override public void onResponse(List> actionListeners) { completeListenersIgnoringException(actionListeners, snapshotInfo); - final Map attributes = SnapshotMetrics.createAttributesMap(repo.getMetadata()); - final SnapshotMetrics snapshotMetrics = repositoriesService.getSnapshotMetrics(); + final Map attributes = SnapshotMetrics.createAttributesMap( + snapshot.getProjectId(), + repo.getMetadata() + ); snapshotMetrics.snapshotsCompletedCounter().incrementBy(1, attributes); snapshotMetrics.snapshotsDurationHistogram() .record((snapshotInfo.endTime() - snapshotInfo.startTime()) / 1_000.0, attributes); @@ -4329,8 +4337,11 @@ private SnapshotsInProgress createSnapshot( final var res = snapshotsInProgress.withAddedEntry(newEntry); taskContext.success(() -> { logger.info("snapshot [{}] started", snapshot); - final Map attributes = SnapshotMetrics.createAttributesMap(repository.getMetadata()); - repositoriesService.getSnapshotMetrics().snapshotsStartedCounter().incrementBy(1, attributes); + final Map attributes = SnapshotMetrics.createAttributesMap( + snapshot.getProjectId(), + repository.getMetadata() + ); + snapshotMetrics.snapshotsStartedCounter().incrementBy(1, attributes); createSnapshotTask.listener.onResponse(snapshot); if (newEntry.state().completed()) { endSnapshot(newEntry, currentState.metadata(), createSnapshotTask.repositoryData); @@ -4340,6 +4351,24 @@ private SnapshotsInProgress createSnapshot( } } + private Collection getSnapshotsInProgress() { + final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(clusterService.state()); + final List snapshotsInProgressMetrics = new ArrayList<>(); + clusterService.state().metadata().projects().forEach((projectId, project) -> { + RepositoriesMetadata repositoriesMetadata = RepositoriesMetadata.get(project); + if (repositoriesMetadata != null) { + repositoriesMetadata.repositories().forEach(repository -> { + int snapshotCount = snapshotsInProgress.forRepo(projectId, repository.name()).size(); + logger.info("Returning snapshot count of {}", snapshotCount); + snapshotsInProgressMetrics.add( + new LongWithAttributes(snapshotCount, SnapshotMetrics.createAttributesMap(projectId, repository)) + ); + }); + } + }); + return snapshotsInProgressMetrics; + } + private record UpdateNodeIdsForRemovalTask() implements ClusterStateTaskListener { @Override public void onFailure(Exception e) { diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java index 4a5830864321f..b1728aeddadaf 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/repositories/reservedstate/ReservedRepositoryActionTests.java @@ -22,8 +22,8 @@ import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryException; import org.elasticsearch.repositories.RepositoryMissingException; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.reservedstate.TransformState; -import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xcontent.XContentParser; @@ -153,7 +153,7 @@ public Repository create(ProjectId projectId, RepositoryMetadata metadata) { threadPool, mock(NodeClient.class), null, - MeterRegistry.NOOP + SnapshotMetrics.NOOP ) ); diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusActionTests.java index d4acb98bfdd8b..92c5533d834c6 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusActionTests.java @@ -26,12 +26,12 @@ import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.ShardGeneration; import org.elasticsearch.repositories.ShardSnapshotResult; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.snapshots.Snapshot; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.TaskCancelHelper; import org.elasticsearch.tasks.TaskCancelledException; -import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.transport.CapturingTransport; @@ -76,7 +76,7 @@ public void initializeComponents() throws Exception { threadPool, nodeClient, List.of(), - MeterRegistry.NOOP + SnapshotMetrics.NOOP ); action = new TransportSnapshotsStatusAction( transportService, diff --git a/server/src/test/java/org/elasticsearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java b/server/src/test/java/org/elasticsearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java index 851fe469ca6a5..bc60cb9699a53 100644 --- a/server/src/test/java/org/elasticsearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java +++ b/server/src/test/java/org/elasticsearch/indices/cluster/IndicesClusterStateServiceRandomUpdatesTests.java @@ -47,7 +47,7 @@ import org.elasticsearch.indices.recovery.PeerRecoveryTargetService; import org.elasticsearch.indices.recovery.SnapshotFilesProvider; import org.elasticsearch.repositories.RepositoriesService; -import org.elasticsearch.telemetry.metric.MeterRegistry; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.Transport; @@ -551,7 +551,7 @@ private IndicesClusterStateService createIndicesClusterStateService( threadPool, client, List.of(), - MeterRegistry.NOOP + SnapshotMetrics.NOOP ); final PeerRecoveryTargetService recoveryTargetService = new PeerRecoveryTargetService( client, diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoriesModuleTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoriesModuleTests.java index 7f6885e7a977f..e3b59a6e3d941 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoriesModuleTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoriesModuleTests.java @@ -96,7 +96,8 @@ public void testCanRegisterTwoRepositoriesWithDifferentTypes() { MockBigArrays.NON_RECYCLING_INSTANCE, contentRegistry, recoverySettings, - TelemetryProvider.NOOP + TelemetryProvider.NOOP, + SnapshotMetrics.NOOP ); } @@ -133,7 +134,8 @@ public void testCannotRegisterTwoRepositoriesWithSameTypes() { MockBigArrays.NON_RECYCLING_INSTANCE, contentRegistry, recoverySettings, - TelemetryProvider.NOOP + TelemetryProvider.NOOP, + SnapshotMetrics.NOOP ) ); @@ -159,7 +161,8 @@ public void testCannotRegisterTwoInternalRepositoriesWithSameTypes() { MockBigArrays.NON_RECYCLING_INSTANCE, contentRegistry, recoverySettings, - TelemetryProvider.NOOP + TelemetryProvider.NOOP, + SnapshotMetrics.NOOP ) ); @@ -192,7 +195,8 @@ public void testCannotRegisterNormalAndInternalRepositoriesWithSameTypes() { MockBigArrays.NON_RECYCLING_INSTANCE, contentRegistry, recoverySettings, - TelemetryProvider.NOOP + TelemetryProvider.NOOP, + SnapshotMetrics.NOOP ) ); diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java index fd6c473c2ff27..7ecf4847e078a 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java @@ -53,7 +53,6 @@ import org.elasticsearch.repositories.blobstore.MeteredBlobStoreRepository; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; -import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; @@ -157,7 +156,7 @@ public void setUp() throws Exception { threadPool, client, List.of(), - MeterRegistry.NOOP + SnapshotMetrics.NOOP ); clusterService.start(); diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index c2b60d248a468..5fb9df743b4b0 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -174,6 +174,7 @@ import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryData; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.VerifyNodeRepositoryAction; import org.elasticsearch.repositories.VerifyNodeRepositoryCoordinationAction; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; @@ -187,7 +188,6 @@ import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.FetchPhase; import org.elasticsearch.telemetry.TelemetryProvider; -import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.telemetry.tracing.Tracer; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; @@ -2412,7 +2412,7 @@ public RecyclerBytesStreamOutput newNetworkBytesStream() { threadPool, client, List.of(), - MeterRegistry.NOOP + SnapshotMetrics.NOOP ); final ActionFilters actionFilters = new ActionFilters(emptySet()); snapshotsService = new SnapshotsService( @@ -2423,7 +2423,8 @@ public RecyclerBytesStreamOutput newNetworkBytesStream() { repositoriesService, transportService, actionFilters, - EmptySystemIndices.INSTANCE + EmptySystemIndices.INSTANCE, + SnapshotMetrics.NOOP ); nodeEnv = new NodeEnvironment(settings, environment); final NamedXContentRegistry namedXContentRegistry = new NamedXContentRegistry(Collections.emptyList()); From 3894be6fe63e1239d5d5db6ae7530f119c30c7d5 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 3 Jul 2025 16:11:26 +1000 Subject: [PATCH 18/65] Tidy --- .../org/elasticsearch/repositories/SnapshotMetricsIT.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 466ab410fadf6..277a94df8c419 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -74,8 +74,9 @@ public void testSnapshotAPMMetrics() throws Exception { final String repositoryName = randomIdentifier(); - // we want to ensure some throttling, but not too much that it slows the test down. 3 seemed a reasonable multiple to ensure that. - int shardSizeMultipleToEnsureThrottling = 3; + // we want to ensure some throttling, but not so much that it makes the test excessively slow. + // 3 seemed a reasonable multiple to ensure that. + final int shardSizeMultipleToEnsureThrottling = 3; createRepository( repositoryName, "mock", From d164c8cc28652bc6aae7d68248d21412e1546e28 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 3 Jul 2025 16:13:18 +1000 Subject: [PATCH 19/65] Tidy --- .../java/org/elasticsearch/repositories/SnapshotMetricsIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 277a94df8c419..3821ed2716f3f 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -118,6 +118,7 @@ public void testSnapshotAPMMetrics() throws Exception { assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_BYTES_UPLOADED), greaterThan(0L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_CREATE_THROTTLE_DURATION), greaterThan(0L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_RESTORE_THROTTLE_DURATION), equalTo(0L)); + assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_STARTED), equalTo(1L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_COMPLETED), equalTo(1L)); From b8cc9f93ebfdc6da8c07bd59571f1963ce7b97d7 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 3 Jul 2025 16:14:20 +1000 Subject: [PATCH 20/65] Tidy --- .../org/elasticsearch/repositories/SnapshotMetricsIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 3821ed2716f3f..f0d7f2a8bebad 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -138,8 +138,8 @@ public void testSnapshotAPMMetrics() throws Exception { // Work out the maximum amount of concurrency per node final ThreadPool tp = internalCluster().getDataNodeInstance(ThreadPool.class); - int snapshotThreadPoolSize = tp.info(ThreadPool.Names.SNAPSHOT).getMax(); - int maximumPerNodeConcurrency = Math.max(snapshotThreadPoolSize, numShards); + final int snapshotThreadPoolSize = tp.info(ThreadPool.Names.SNAPSHOT).getMax(); + final int maximumPerNodeConcurrency = Math.max(snapshotThreadPoolSize, numShards); // sanity check duration values final long upperBoundTimeSpentOnSnapshotThingsNanos = internalCluster().numDataNodes() * maximumPerNodeConcurrency From 7f7427c7c579028d2d774e8a233bdf6b4665e90b Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 3 Jul 2025 16:41:33 +1000 Subject: [PATCH 21/65] Reduce surface area of change (?) --- .../azure/AzureRepositoryPlugin.java | 37 +++++----- ...eCloudStorageBlobStoreRepositoryTests.java | 71 +++++++++---------- .../gcs/GoogleCloudStoragePlugin.java | 36 +++++----- .../repositories/s3/S3RepositoryPlugin.java | 32 ++++----- .../repository/url/URLRepositoryPlugin.java | 4 +- .../repositories/hdfs/HdfsPlugin.java | 4 +- .../plan/ShardSnapshotsServiceIT.java | 4 +- .../repositories/InvalidRepositoryIT.java | 3 +- ...BlobStoreRepositoryOperationPurposeIT.java | 4 +- ...etadataLoadingDuringSnapshotRestoreIT.java | 3 +- .../SnapshotsServiceDoubleFinalizationIT.java | 4 +- .../plugins/RepositoryPlugin.java | 4 +- .../repositories/RepositoriesModule.java | 33 ++++----- .../repositories/RepositoriesService.java | 25 ++----- .../repositories/Repository.java | 29 +------- .../repositories/RepositoriesModuleTests.java | 15 ++-- ...bStoreRepositoryDeleteThrottlingTests.java | 4 +- ...ncySimulatingBlobStoreRepositoryTests.java | 4 +- .../LatencySimulatingRepositoryPlugin.java | 4 +- .../snapshots/mockstore/MockRepository.java | 32 ++++----- .../sourceonly/SourceOnlySnapshotIT.java | 4 +- .../SourceOnlySnapshotRepository.java | 16 +---- .../elasticsearch/xpack/core/XPackPlugin.java | 4 +- .../core/LocalStateCompositeXPackPlugin.java | 24 ++++++- .../lucene/bwc/AbstractArchiveTestCase.java | 4 +- ...chableSnapshotDiskThresholdIntegTests.java | 3 +- ...archableSnapshotsPrewarmingIntegTests.java | 4 +- ...SnapshotRecoveryStateIntegrationTests.java | 4 +- .../xpack/slm/SLMHealthBlockedSnapshotIT.java | 4 +- .../xpack/slm/SLMStatDisruptionIT.java | 7 +- .../SnapshotBasedIndexRecoveryIT.java | 4 +- .../analyze/RepositoryAnalysisFailureIT.java | 3 +- .../analyze/RepositoryAnalysisSuccessIT.java | 3 +- .../votingonly/VotingOnlyNodePluginTests.java | 4 +- 34 files changed, 219 insertions(+), 221 deletions(-) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java index ed86303865fab..8a3194b23d907 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepositoryPlugin.java @@ -10,8 +10,6 @@ package org.elasticsearch.repositories.azure; import org.apache.lucene.util.SetOnce; -import org.elasticsearch.cluster.metadata.ProjectId; -import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Setting; @@ -63,26 +61,23 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { - return Collections.singletonMap(AzureRepository.TYPE, new Repository.SnapshotMetricsFactory() { - - @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { - AzureStorageService storageService = azureStoreService.get(); - assert storageService != null; - return new AzureRepository( - projectId, - metadata, - namedXContentRegistry, - storageService, - clusterService, - bigArrays, - recoverySettings, - repositoriesMetrics, - snapshotMetrics - ); - } + return Collections.singletonMap(AzureRepository.TYPE, (projectId, metadata) -> { + AzureStorageService storageService = azureStoreService.get(); + assert storageService != null; + return new AzureRepository( + projectId, + metadata, + namedXContentRegistry, + storageService, + clusterService, + bigArrays, + recoverySettings, + repositoriesMetrics, + snapshotMetrics + ); }); } diff --git a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java index efdf0b84e5525..3591a608ff71e 100644 --- a/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java +++ b/modules/repository-gcs/src/internalClusterTest/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobStoreRepositoryTests.java @@ -22,7 +22,6 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; -import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.BackoffPolicy; @@ -274,44 +273,42 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { - return Collections.singletonMap(GoogleCloudStorageRepository.TYPE, new Repository.SnapshotMetricsFactory() { - - @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { - return new GoogleCloudStorageRepository( - projectId, - metadata, - registry, - TestGoogleCloudStoragePlugin.this.storageService, - clusterService, - bigArrays, - recoverySettings, - new GcsRepositoryStatsCollector(), - snapshotMetrics - ) { - @Override - protected GoogleCloudStorageBlobStore createBlobStore() { - return new GoogleCloudStorageBlobStore( - metadata.settings().get("bucket"), - "test", - metadata.name(), - storageService, - bigArrays, - randomIntBetween(1, 8) * 1024, - BackoffPolicy.noBackoff(), - this.statsCollector() - ) { - @Override - long getLargeBlobThresholdInBytes() { - return ByteSizeUnit.MB.toBytes(1); - } - }; - } - }; + return Collections.singletonMap( + GoogleCloudStorageRepository.TYPE, + (projectId, metadata) -> new GoogleCloudStorageRepository( + projectId, + metadata, + registry, + this.storageService, + clusterService, + bigArrays, + recoverySettings, + new GcsRepositoryStatsCollector(), + snapshotMetrics + ) { + @Override + protected GoogleCloudStorageBlobStore createBlobStore() { + return new GoogleCloudStorageBlobStore( + metadata.settings().get("bucket"), + "test", + metadata.name(), + storageService, + bigArrays, + randomIntBetween(1, 8) * 1024, + BackoffPolicy.noBackoff(), + this.statsCollector() + ) { + @Override + long getLargeBlobThresholdInBytes() { + return ByteSizeUnit.MB.toBytes(1); + } + }; + } } - }); + ); } } diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java index 15ab1fa0d1f53..89548513c4ff1 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStoragePlugin.java @@ -9,8 +9,6 @@ package org.elasticsearch.repositories.gcs; -import org.elasticsearch.cluster.metadata.ProjectId; -import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Setting; @@ -56,25 +54,23 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { - return Collections.singletonMap(GoogleCloudStorageRepository.TYPE, new Repository.SnapshotMetricsFactory() { - - @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { - return new GoogleCloudStorageRepository( - projectId, - metadata, - namedXContentRegistry, - GoogleCloudStoragePlugin.this.storageService, - clusterService, - bigArrays, - recoverySettings, - new GcsRepositoryStatsCollector(clusterService.threadPool(), metadata, repositoriesMetrics), - snapshotMetrics - ); - } - }); + return Collections.singletonMap( + GoogleCloudStorageRepository.TYPE, + (projectId, metadata) -> new GoogleCloudStorageRepository( + projectId, + metadata, + namedXContentRegistry, + this.storageService, + clusterService, + bigArrays, + recoverySettings, + new GcsRepositoryStatsCollector(clusterService.threadPool(), metadata, repositoriesMetrics), + snapshotMetrics + ) + ); } @Override diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java index 131175128af44..da81543515c4e 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3RepositoryPlugin.java @@ -116,25 +116,23 @@ public Map getRepositories( final ClusterService clusterService, final BigArrays bigArrays, final RecoverySettings recoverySettings, - final RepositoriesMetrics repositoriesMetrics + final RepositoriesMetrics repositoriesMetrics, + final SnapshotMetrics snapshotMetrics ) { final S3RepositoriesMetrics s3RepositoriesMetrics = new S3RepositoriesMetrics(repositoriesMetrics); - return Collections.singletonMap(S3Repository.TYPE, new Repository.SnapshotMetricsFactory() { - - @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { - return createRepository( - projectId, - metadata, - registry, - clusterService, - bigArrays, - recoverySettings, - s3RepositoriesMetrics, - snapshotMetrics - ); - } - }); + return Collections.singletonMap( + S3Repository.TYPE, + (projectId, metadata) -> createRepository( + projectId, + metadata, + registry, + clusterService, + bigArrays, + recoverySettings, + s3RepositoriesMetrics, + snapshotMetrics + ) + ); } @Override diff --git a/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java b/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java index 821b761aa663d..ee01a25fcf331 100644 --- a/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java +++ b/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java @@ -20,6 +20,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.url.URLRepository; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -49,7 +50,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap( URLRepository.TYPE, diff --git a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java index 9d80a270c7242..a5d1385da8a16 100644 --- a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java +++ b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java @@ -19,6 +19,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.xcontent.NamedXContentRegistry; import java.util.Collections; @@ -67,7 +68,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap( "hdfs", diff --git a/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java index 06e4d06fcee0e..758b67b4af486 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/plan/ShardSnapshotsServiceIT.java @@ -35,6 +35,7 @@ import org.elasticsearch.repositories.RepositoryData; import org.elasticsearch.repositories.ShardGeneration; import org.elasticsearch.repositories.ShardSnapshotInfo; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.snapshots.SnapshotException; @@ -76,7 +77,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap( TYPE, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java index cc0dc44e10d9f..720eb772b131c 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java @@ -83,7 +83,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap( TYPE, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java index fb119caa25cb6..51ec2d12b90c5 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryOperationPurposeIT.java @@ -29,6 +29,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -97,7 +98,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Map.of( ASSERTING_REPO_TYPE, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java index 73f5c1c0b8a62..f58382233550c 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java @@ -224,7 +224,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap( TYPE, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java index 7c00a93fa495b..9cf8163a4661a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotsServiceDoubleFinalizationIT.java @@ -35,6 +35,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.snapshots.mockstore.BlobStoreWrapper; import org.elasticsearch.test.ESIntegTestCase; @@ -209,7 +210,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Map.of( REPO_TYPE, diff --git a/server/src/main/java/org/elasticsearch/plugins/RepositoryPlugin.java b/server/src/main/java/org/elasticsearch/plugins/RepositoryPlugin.java index aa1587118f166..97ce00cc4c375 100644 --- a/server/src/main/java/org/elasticsearch/plugins/RepositoryPlugin.java +++ b/server/src/main/java/org/elasticsearch/plugins/RepositoryPlugin.java @@ -16,6 +16,7 @@ import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.snapshots.Snapshot; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -42,7 +43,8 @@ default Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.emptyMap(); } diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java index 2c784195d12ef..610b02eb7c0a1 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesModule.java @@ -10,8 +10,6 @@ package org.elasticsearch.repositories; import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.cluster.metadata.ProjectId; -import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.BigArrays; @@ -55,22 +53,18 @@ public RepositoriesModule( ) { final RepositoriesMetrics repositoriesMetrics = new RepositoriesMetrics(telemetryProvider.getMeterRegistry()); Map factories = new HashMap<>(); - factories.put(FsRepository.TYPE, new Repository.SnapshotMetricsFactory() { - - @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { - return new FsRepository( - projectId, - metadata, - env, - namedXContentRegistry, - clusterService, - bigArrays, - recoverySettings, - snapshotMetrics - ); - } - }); + factories.put( + FsRepository.TYPE, + (projectId, metadata) -> new FsRepository( + projectId, + metadata, + env, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings + ) + ); for (RepositoryPlugin repoPlugin : repoPlugins) { Map newRepoTypes = repoPlugin.getRepositories( @@ -79,7 +73,8 @@ public Repository create(ProjectId projectId, RepositoryMetadata metadata, Snaps clusterService, bigArrays, recoverySettings, - repositoriesMetrics + repositoriesMetrics, + snapshotMetrics ); for (Map.Entry entry : newRepoTypes.entrySet()) { if (factories.put(entry.getKey(), entry.getValue()) != null) { diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java index 4ec3cf2cdc338..aba8d9f03d290 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java @@ -755,8 +755,7 @@ private void applyProjectStateForAddedOrExistingProject(long version, ProjectSta projectId, repositoryMetadata, typesRegistry, - RepositoriesService::createUnknownTypeRepository, - snapshotMetrics + RepositoriesService::createUnknownTypeRepository ); } catch (RepositoryException ex) { // TODO: this catch is bogus, it means the old repo is already closed, @@ -771,8 +770,7 @@ private void applyProjectStateForAddedOrExistingProject(long version, ProjectSta projectId, repositoryMetadata, typesRegistry, - RepositoriesService::createUnknownTypeRepository, - snapshotMetrics + RepositoriesService::createUnknownTypeRepository ); } catch (RepositoryException ex) { logger.warn(() -> "failed to create repository " + projectRepoString(projectId, repositoryMetadata.name()), ex); @@ -948,8 +946,7 @@ public void registerInternalRepository(ProjectId projectId, String name, String projectId, metadata, internalTypesRegistry, - RepositoriesService::throwRepositoryTypeDoesNotExists, - snapshotMetrics + RepositoriesService::throwRepositoryTypeDoesNotExists ); final var newRepos = new HashMap<>(existingRepos); newRepos.put(name, repo); @@ -1029,8 +1026,7 @@ private static Repository createRepository( @Nullable ProjectId projectId, RepositoryMetadata repositoryMetadata, Map factories, - BiFunction defaultFactory, - SnapshotMetrics snapshotMetrics + BiFunction defaultFactory ) { logger.debug("creating repository [{}][{}]", repositoryMetadata.type(), repositoryMetadata.name()); Repository.Factory factory = factories.get(repositoryMetadata.type()); @@ -1039,7 +1035,7 @@ private static Repository createRepository( } Repository repository = null; try { - repository = factory.create(projectId, repositoryMetadata, factories::get, snapshotMetrics); + repository = factory.create(projectId, repositoryMetadata, factories::get); repository.start(); return repository; } catch (Exception e) { @@ -1070,8 +1066,7 @@ public Repository createRepository(ProjectId projectId, RepositoryMetadata repos Objects.requireNonNull(projectId), repositoryMetadata, typesRegistry, - RepositoriesService::throwRepositoryTypeDoesNotExists, - snapshotMetrics + RepositoriesService::throwRepositoryTypeDoesNotExists ); } @@ -1082,13 +1077,7 @@ public Repository createRepository(ProjectId projectId, RepositoryMetadata repos public Repository createNonProjectRepository(RepositoryMetadata repositoryMetadata) { assert DiscoveryNode.isStateless(clusterService.getSettings()) : "outside stateless only project level repositories are allowed: " + repositoryMetadata; - return createRepository( - null, - repositoryMetadata, - typesRegistry, - RepositoriesService::throwRepositoryTypeDoesNotExists, - snapshotMetrics - ); + return createRepository(null, repositoryMetadata, typesRegistry, RepositoriesService::throwRepositoryTypeDoesNotExists); } private Collection getShardSnapshotsInProgress() { diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index 98b02053c7554..37bc299e3d9e2 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -65,43 +65,18 @@ interface Factory { */ Repository create(@Nullable ProjectId projectId, RepositoryMetadata metadata) throws Exception; - /** - * Constructs a repository. - * - * @param projectId the project-id for the repository or {@code null} if the repository is at the cluster level. - * @param metadata metadata for the repository including name and settings - * @param snapshotMetrics the singleton SnapshotMetrics instance - */ - default Repository create(@Nullable ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) - throws Exception { - return create(projectId, metadata); - } - /** * Constructs a repository. * @param projectId the project-id for the repository or {@code null} if the repository is at the cluster level. * @param metadata metadata for the repository including name and settings * @param typeLookup a function that returns the repository factory for the given repository type. - * @param snapshotMetrics the singleton SnapshotMetrics instance */ default Repository create( @Nullable ProjectId projectId, RepositoryMetadata metadata, - Function typeLookup, - SnapshotMetrics snapshotMetrics + Function typeLookup ) throws Exception { - return create(projectId, metadata, snapshotMetrics); - } - } - - /** - * A convenience class for {@link Factory} instances that require a {@link SnapshotMetrics} instance - */ - class SnapshotMetricsFactory implements Factory { - - @Override - public final Repository create(ProjectId projectId, RepositoryMetadata metadata) throws Exception { - throw new UnsupportedOperationException("This repository requires a SnapshotMetrics implementation"); + return create(projectId, metadata); } } diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoriesModuleTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoriesModuleTests.java index e3b59a6e3d941..9fb575f5a6b79 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoriesModuleTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoriesModuleTests.java @@ -72,7 +72,8 @@ public void testCanRegisterTwoRepositoriesWithDifferentTypes() { eq(clusterService), eq(MockBigArrays.NON_RECYCLING_INSTANCE), eq(recoverySettings), - any(RepositoriesMetrics.class) + any(RepositoriesMetrics.class), + any(SnapshotMetrics.class) ) ).thenReturn(Collections.singletonMap("type1", factory)); when( @@ -82,7 +83,8 @@ public void testCanRegisterTwoRepositoriesWithDifferentTypes() { eq(clusterService), eq(MockBigArrays.NON_RECYCLING_INSTANCE), eq(recoverySettings), - any(RepositoriesMetrics.class) + any(RepositoriesMetrics.class), + any(SnapshotMetrics.class) ) ).thenReturn(Collections.singletonMap("type2", factory)); @@ -109,7 +111,8 @@ public void testCannotRegisterTwoRepositoriesWithSameTypes() { eq(clusterService), eq(MockBigArrays.NON_RECYCLING_INSTANCE), eq(recoverySettings), - any(RepositoriesMetrics.class) + any(RepositoriesMetrics.class), + any(SnapshotMetrics.class) ) ).thenReturn(Collections.singletonMap("type1", factory)); when( @@ -119,7 +122,8 @@ public void testCannotRegisterTwoRepositoriesWithSameTypes() { eq(clusterService), eq(MockBigArrays.NON_RECYCLING_INSTANCE), eq(recoverySettings), - any(RepositoriesMetrics.class) + any(RepositoriesMetrics.class), + any(SnapshotMetrics.class) ) ).thenReturn(Collections.singletonMap("type1", factory)); @@ -177,7 +181,8 @@ public void testCannotRegisterNormalAndInternalRepositoriesWithSameTypes() { eq(clusterService), eq(MockBigArrays.NON_RECYCLING_INSTANCE), eq(recoverySettings), - any(RepositoriesMetrics.class) + any(RepositoriesMetrics.class), + any(SnapshotMetrics.class) ) ).thenReturn(Collections.singletonMap("type1", factory)); when(plugin2.getInternalRepositories(environment, contentRegistry, clusterService, recoverySettings)).thenReturn( diff --git a/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java b/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java index de1979cfcf5d2..0d8011830dcec 100644 --- a/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/blobstore/BlobStoreRepositoryDeleteThrottlingTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESSingleNodeTestCase; @@ -71,7 +72,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap( TEST_REPO_TYPE, diff --git a/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java b/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java index c553f0f10062a..f7cfb79be4893 100644 --- a/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java +++ b/test/external-modules/latency-simulating-directory/src/internalClusterTest/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepositoryTests.java @@ -23,6 +23,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; import org.elasticsearch.snapshots.SnapshotInfo; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -61,7 +62,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Map.of( REPO_TYPE, diff --git a/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java b/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java index b60c965a66537..e4e5626017945 100644 --- a/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java +++ b/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingRepositoryPlugin.java @@ -18,6 +18,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.xcontent.NamedXContentRegistry; import java.util.Map; @@ -37,7 +38,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Map.of( TYPE, diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java index 7a42de93ff1de..e0d84de74dc53 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java @@ -87,24 +87,22 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { - return Collections.singletonMap("mock", new SnapshotMetricsFactory() { - - @Override - public Repository create(ProjectId projectId, RepositoryMetadata metadata, SnapshotMetrics snapshotMetrics) { - return new MockRepository( - projectId, - metadata, - env, - namedXContentRegistry, - clusterService, - bigArrays, - recoverySettings, - snapshotMetrics - ); - } - }); + return Collections.singletonMap( + "mock", + (projectId, metadata) -> new MockRepository( + projectId, + metadata, + env, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings, + snapshotMetrics + ) + ); } @Override diff --git a/x-pack/plugin/core/src/internalClusterTest/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotIT.java b/x-pack/plugin/core/src/internalClusterTest/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotIT.java index 6074b81d68608..5de93cc173faa 100644 --- a/x-pack/plugin/core/src/internalClusterTest/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotIT.java +++ b/x-pack/plugin/core/src/internalClusterTest/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotIT.java @@ -34,6 +34,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.slice.SliceBuilder; @@ -89,7 +90,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap("source", SourceOnlySnapshotRepository.newRepositoryFactory()); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java index 1ae1e5e0fe89b..e6f3c3fa54277 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/snapshots/sourceonly/SourceOnlySnapshotRepository.java @@ -43,7 +43,6 @@ import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.SnapshotIndexCommit; -import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.SnapshotShardContext; import java.io.Closeable; @@ -263,24 +262,15 @@ public Repository create(ProjectId projectId, RepositoryMetadata metadata) { } @Override - public Repository create( - ProjectId projectId, - RepositoryMetadata metadata, - Function typeLookup, - SnapshotMetrics snapshotMetrics - ) throws Exception { + public Repository create(ProjectId projectId, RepositoryMetadata metadata, Function typeLookup) + throws Exception { String delegateType = DELEGATE_TYPE.get(metadata.settings()); if (Strings.hasLength(delegateType) == false) { throw new IllegalArgumentException(DELEGATE_TYPE.getKey() + " must be set"); } Repository.Factory factory = typeLookup.apply(delegateType); return new SourceOnlySnapshotRepository( - factory.create( - projectId, - new RepositoryMetadata(metadata.name(), delegateType, metadata.settings()), - typeLookup, - snapshotMetrics - ) + factory.create(projectId, new RepositoryMetadata(metadata.name(), delegateType, metadata.settings()), typeLookup) ); } }; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackPlugin.java index 23ca86d48bd1e..bb0a21a6afd2a 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackPlugin.java @@ -80,6 +80,7 @@ import org.elasticsearch.protocol.xpack.XPackUsageRequest; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; import org.elasticsearch.snapshots.sourceonly.SourceOnlySnapshotRepository; @@ -441,7 +442,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap("source", SourceOnlySnapshotRepository.newRepositoryFactory()); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java index 828abf4095d86..18256894f4430 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java @@ -81,6 +81,7 @@ import org.elasticsearch.plugins.interceptor.RestServerActionPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; import org.elasticsearch.rest.RestHeaderDefinition; @@ -512,14 +513,31 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { HashMap repositories = new HashMap<>( - super.getRepositories(env, namedXContentRegistry, clusterService, bigArrays, recoverySettings, repositoriesMetrics) + super.getRepositories( + env, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings, + repositoriesMetrics, + snapshotMetrics + ) ); filterPlugins(RepositoryPlugin.class).forEach( r -> repositories.putAll( - r.getRepositories(env, namedXContentRegistry, clusterService, bigArrays, recoverySettings, RepositoriesMetrics.NOOP) + r.getRepositories( + env, + namedXContentRegistry, + clusterService, + bigArrays, + recoverySettings, + RepositoriesMetrics.NOOP, + SnapshotMetrics.NOOP + ) ) ); return repositories; diff --git a/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java b/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java index 614036d3792ca..5030956c8e063 100644 --- a/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java +++ b/x-pack/plugin/old-lucene-versions/src/internalClusterTest/java/org/elasticsearch/xpack/lucene/bwc/AbstractArchiveTestCase.java @@ -25,6 +25,7 @@ import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryData; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; import org.elasticsearch.snapshots.SnapshotId; @@ -63,7 +64,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Map.of( FAKE_VERSIONS_TYPE, diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java index 237e2387d0a4f..f5a7ba91f87c5 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java @@ -378,7 +378,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap( TYPE, diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java index dcf77d06e323a..d3f1407c04136 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/cache/full/SearchableSnapshotsPrewarmingIntegTests.java @@ -45,6 +45,7 @@ import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryData; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.blobstore.ESBlobStoreRepositoryIntegTestCase; import org.elasticsearch.repositories.fs.FsRepository; @@ -450,7 +451,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap( "tracking", diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java index e0674c0151f65..69e66110aeab5 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/recovery/SearchableSnapshotRecoveryStateIntegrationTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryData; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.blobstore.ESBlobStoreRepositoryIntegTestCase; import org.elasticsearch.repositories.fs.FsRepository; @@ -244,7 +245,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap( "test-fs", diff --git a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java index 219dd20f4e620..5348ddbcbc655 100644 --- a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java +++ b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMHealthBlockedSnapshotIT.java @@ -31,6 +31,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.SnapshotShardContext; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; @@ -116,7 +117,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Map.of( TestDelayedRepo.TYPE, diff --git a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java index d3eff9eb8585e..206bf58423d37 100644 --- a/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java +++ b/x-pack/plugin/slm/src/internalClusterTest/java/org/elasticsearch/xpack/slm/SLMStatDisruptionIT.java @@ -33,6 +33,7 @@ import org.elasticsearch.repositories.FinalizeSnapshotContext; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.SnapshotShardContext; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; @@ -134,7 +135,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Map.of( TestDelayedRepo.TYPE, @@ -206,7 +208,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Map.of( TestRestartBeforeListenersRepo.TYPE, diff --git a/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java b/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java index 6f73efb2fa095..1eda191f60b35 100644 --- a/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java +++ b/x-pack/plugin/snapshot-based-recoveries/src/internalClusterTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java @@ -57,6 +57,7 @@ import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.search.SearchHit; @@ -148,7 +149,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Map.of( FAULTY_TYPE, diff --git a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java index a347cae9f0954..8381645279db0 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java @@ -543,7 +543,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Map.of( DISRUPTABLE_REPO_TYPE, diff --git a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java index 6837684e71611..3af479ea50ee0 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java @@ -185,7 +185,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Map.of( ASSERTING_REPO_TYPE, diff --git a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java index 92297f7585128..88a36c2b7aa43 100644 --- a/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java +++ b/x-pack/plugin/voting-only-node/src/internalClusterTest/java/org/elasticsearch/cluster/coordination/votingonly/VotingOnlyNodePluginTests.java @@ -26,6 +26,7 @@ import org.elasticsearch.plugins.RepositoryPlugin; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.snapshots.SnapshotInfo; import org.elasticsearch.snapshots.SnapshotState; @@ -261,7 +262,8 @@ public Map getRepositories( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - RepositoriesMetrics repositoriesMetrics + RepositoriesMetrics repositoriesMetrics, + SnapshotMetrics snapshotMetrics ) { return Collections.singletonMap( "verifyaccess-fs", From e2665d1bb3219c9057bc395890a46fcd3ea06c1f Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 3 Jul 2025 16:47:55 +1000 Subject: [PATCH 22/65] URLRepository --- .../plugin/repository/url/URLRepositoryPlugin.java | 3 ++- .../repositories/url/URLRepository.java | 14 +++----------- .../repositories/url/URLRepositoryTests.java | 4 +++- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java b/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java index ee01a25fcf331..1ca70376a7917 100644 --- a/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java +++ b/modules/repository-url/src/main/java/org/elasticsearch/plugin/repository/url/URLRepositoryPlugin.java @@ -63,7 +63,8 @@ public Map getRepositories( clusterService, bigArrays, recoverySettings, - httpClientFactory.updateAndGet(factory -> factory == null ? new URLHttpClient.Factory() : factory) + httpClientFactory.updateAndGet(factory -> factory == null ? new URLHttpClient.Factory() : factory), + snapshotMetrics ) ); } diff --git a/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java b/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java index 3a6f2268f2878..1cc6917562504 100644 --- a/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java +++ b/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java @@ -101,18 +101,10 @@ public URLRepository( ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - URLHttpClient.Factory httpClientFactory + URLHttpClient.Factory httpClientFactory, + SnapshotMetrics snapshotMetrics ) { - super( - projectId, - metadata, - namedXContentRegistry, - clusterService, - bigArrays, - recoverySettings, - BlobPath.EMPTY, - SnapshotMetrics.NOOP - ); + super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, BlobPath.EMPTY, snapshotMetrics); if (URL_SETTING.exists(metadata.settings()) == false && REPOSITORIES_URL_SETTING.exists(environment.settings()) == false) { throw new RepositoryException(metadata.name(), "missing url"); diff --git a/modules/repository-url/src/test/java/org/elasticsearch/repositories/url/URLRepositoryTests.java b/modules/repository-url/src/test/java/org/elasticsearch/repositories/url/URLRepositoryTests.java index 34c360550a11b..11c10cf4a8dee 100644 --- a/modules/repository-url/src/test/java/org/elasticsearch/repositories/url/URLRepositoryTests.java +++ b/modules/repository-url/src/test/java/org/elasticsearch/repositories/url/URLRepositoryTests.java @@ -19,6 +19,7 @@ import org.elasticsearch.env.TestEnvironment; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoryException; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreTestUtil; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.NamedXContentRegistry; @@ -45,7 +46,8 @@ private URLRepository createRepository(Settings baseSettings, RepositoryMetadata BlobStoreTestUtil.mockClusterService(), MockBigArrays.NON_RECYCLING_INSTANCE, new RecoverySettings(baseSettings, new ClusterSettings(baseSettings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), - mock(URLHttpClient.Factory.class) + mock(URLHttpClient.Factory.class), + SnapshotMetrics.NOOP ); assertThat(repository.getProjectId(), equalTo(projectId)); return repository; From 23e5ca892d2e19f47ff543143a0bd551d140bfc9 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 3 Jul 2025 16:50:01 +1000 Subject: [PATCH 23/65] HfdsRepository --- .../repositories/hdfs/HdfsPlugin.java | 3 ++- .../repositories/hdfs/HdfsRepository.java | 14 +++----------- .../hdfs/HdfsBlobStoreContainerTests.java | 4 +++- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java index a5d1385da8a16..4ed6aa5d4164f 100644 --- a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java +++ b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsPlugin.java @@ -80,7 +80,8 @@ public Map getRepositories( namedXContentRegistry, clusterService, bigArrays, - recoverySettings + recoverySettings, + snapshotMetrics ) ); } diff --git a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java index 19c7a2366b2b5..00b0bb92c6624 100644 --- a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java +++ b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java @@ -64,18 +64,10 @@ public HdfsRepository( NamedXContentRegistry namedXContentRegistry, ClusterService clusterService, BigArrays bigArrays, - RecoverySettings recoverySettings + RecoverySettings recoverySettings, + SnapshotMetrics snapshotMetrics ) { - super( - projectId, - metadata, - namedXContentRegistry, - clusterService, - bigArrays, - recoverySettings, - BlobPath.EMPTY, - SnapshotMetrics.NOOP - ); + super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, BlobPath.EMPTY, snapshotMetrics); this.environment = environment; this.chunkSize = metadata.settings().getAsBytesSize("chunk_size", null); diff --git a/plugins/repository-hdfs/src/test/java/org/elasticsearch/repositories/hdfs/HdfsBlobStoreContainerTests.java b/plugins/repository-hdfs/src/test/java/org/elasticsearch/repositories/hdfs/HdfsBlobStoreContainerTests.java index d0b84021e704c..3326f4b41033a 100644 --- a/plugins/repository-hdfs/src/test/java/org/elasticsearch/repositories/hdfs/HdfsBlobStoreContainerTests.java +++ b/plugins/repository-hdfs/src/test/java/org/elasticsearch/repositories/hdfs/HdfsBlobStoreContainerTests.java @@ -30,6 +30,7 @@ import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.env.Environment; import org.elasticsearch.indices.recovery.RecoverySettings; +import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.repositories.blobstore.BlobStoreTestUtil; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.fixtures.hdfs.HdfsClientThreadLeakFilter; @@ -126,7 +127,8 @@ public void testRepositoryProjectId() { NamedXContentRegistry.EMPTY, BlobStoreTestUtil.mockClusterService(), MockBigArrays.NON_RECYCLING_INSTANCE, - new RecoverySettings(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)) + new RecoverySettings(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)), + SnapshotMetrics.NOOP ); assertThat(repository.getProjectId(), equalTo(projectId)); } From 9a0e0ec91ac3158b570f2d9324956502bb15706d Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 3 Jul 2025 16:52:37 +1000 Subject: [PATCH 24/65] Tidy --- .../repositories/InvalidRepositoryIT.java | 17 +++++------------ .../MetadataLoadingDuringSnapshotRestoreIT.java | 17 +++++------------ .../elasticsearch/node/NodeConstruction.java | 2 +- .../repositories/RepositoriesService.java | 2 -- .../snapshots/SnapshotsService.java | 1 - ...archableSnapshotDiskThresholdIntegTests.java | 17 +++++------------ .../analyze/RepositoryAnalysisFailureIT.java | 8 +++++--- .../analyze/RepositoryAnalysisSuccessIT.java | 8 +++++--- 8 files changed, 26 insertions(+), 46 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java index 720eb772b131c..a560063bdeae8 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/InvalidRepositoryIT.java @@ -57,18 +57,10 @@ public UnstableRepository( NamedXContentRegistry namedXContentRegistry, ClusterService clusterService, BigArrays bigArrays, - RecoverySettings recoverySettings + RecoverySettings recoverySettings, + SnapshotMetrics snapshotMetrics ) { - super( - projectId, - metadata, - environment, - namedXContentRegistry, - clusterService, - bigArrays, - recoverySettings, - SnapshotMetrics.NOOP - ); + super(projectId, metadata, environment, namedXContentRegistry, clusterService, bigArrays, recoverySettings, snapshotMetrics); List unstableNodes = UNSTABLE_NODES.get(metadata.settings()); if (unstableNodes.contains(clusterService.getNodeName())) { throw new RepositoryException(metadata.name(), "Failed to create repository: current node is not stable"); @@ -95,7 +87,8 @@ public Map getRepositories( namedXContentRegistry, clusterService, bigArrays, - recoverySettings + recoverySettings, + snapshotMetrics ) ); } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java index f58382233550c..78a787d493d19 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/MetadataLoadingDuringSnapshotRestoreIT.java @@ -184,18 +184,10 @@ public CountingMockRepository( final NamedXContentRegistry namedXContentRegistry, ClusterService clusterService, BigArrays bigArrays, - RecoverySettings recoverySettings + RecoverySettings recoverySettings, + SnapshotMetrics snapshotMetrics ) { - super( - projectId, - metadata, - environment, - namedXContentRegistry, - clusterService, - bigArrays, - recoverySettings, - SnapshotMetrics.NOOP - ); + super(projectId, metadata, environment, namedXContentRegistry, clusterService, bigArrays, recoverySettings, snapshotMetrics); } @Override @@ -236,7 +228,8 @@ public Map getRepositories( namedXContentRegistry, clusterService, bigArrays, - recoverySettings + recoverySettings, + snapshotMetrics ) ); } diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index 1362033a67d3f..658503d249470 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -729,7 +729,7 @@ private void construct( BigArrays bigArrays = serviceProvider.newBigArrays(pluginsService, pageCacheRecycler, circuitBreakerService); final RecoverySettings recoverySettings = new RecoverySettings(settings, settingsModule.getClusterSettings()); - SnapshotMetrics snapshotMetrics = new SnapshotMetrics(telemetryProvider.getMeterRegistry()); + final SnapshotMetrics snapshotMetrics = new SnapshotMetrics(telemetryProvider.getMeterRegistry()); RepositoriesModule repositoriesModule = new RepositoriesModule( environment, pluginsService.filterPlugins(RepositoryPlugin.class).toList(), diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java index aba8d9f03d290..b93e7facf2ce7 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java @@ -124,7 +124,6 @@ public class RepositoriesService extends AbstractLifecycleComponent implements C private final RepositoriesStatsArchive repositoriesStatsArchive; private final List> preRestoreChecks; - private final SnapshotMetrics snapshotMetrics; @SuppressWarnings("this-escape") public RepositoriesService( @@ -155,7 +154,6 @@ public RepositoriesService( threadPool.relativeTimeInMillisSupplier() ); this.preRestoreChecks = preRestoreChecks; - this.snapshotMetrics = snapshotMetrics; snapshotMetrics.createSnapshotShardsInProgressMetric(this::getShardSnapshotsInProgress); } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index bced678a731a1..1367d6fa35792 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -4359,7 +4359,6 @@ private Collection getSnapshotsInProgress() { if (repositoriesMetadata != null) { repositoriesMetadata.repositories().forEach(repository -> { int snapshotCount = snapshotsInProgress.forRepo(projectId, repository.name()).size(); - logger.info("Returning snapshot count of {}", snapshotCount); snapshotsInProgressMetrics.add( new LongWithAttributes(snapshotCount, SnapshotMetrics.createAttributesMap(projectId, repository)) ); diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java index f5a7ba91f87c5..3e71b161489a1 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/allocation/SearchableSnapshotDiskThresholdIntegTests.java @@ -390,7 +390,8 @@ public Map getRepositories( namedXContentRegistry, clusterService, bigArrays, - recoverySettings + recoverySettings, + snapshotMetrics ) ); } @@ -407,18 +408,10 @@ public CustomMockRepository( NamedXContentRegistry namedXContentRegistry, ClusterService clusterService, BigArrays bigArrays, - RecoverySettings recoverySettings + RecoverySettings recoverySettings, + SnapshotMetrics snapshotMetrics ) { - super( - projectId, - metadata, - environment, - namedXContentRegistry, - clusterService, - bigArrays, - recoverySettings, - SnapshotMetrics.NOOP - ); + super(projectId, metadata, environment, namedXContentRegistry, clusterService, bigArrays, recoverySettings, snapshotMetrics); } private void unlockRestore() { diff --git a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java index 8381645279db0..595296df58b92 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisFailureIT.java @@ -555,7 +555,8 @@ public Map getRepositories( clusterService, bigArrays, recoverySettings, - BlobPath.EMPTY + BlobPath.EMPTY, + snapshotMetrics ) ); } @@ -572,9 +573,10 @@ static class DisruptableRepository extends BlobStoreRepository { ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - BlobPath basePath + BlobPath basePath, + SnapshotMetrics snapshotMetrics ) { - super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, basePath, SnapshotMetrics.NOOP); + super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, basePath, snapshotMetrics); } void setBlobStore(BlobStore blobStore) { diff --git a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java index 3af479ea50ee0..f93c4018b6232 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/src/internalClusterTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/RepositoryAnalysisSuccessIT.java @@ -197,7 +197,8 @@ public Map getRepositories( clusterService, bigArrays, recoverySettings, - buildBlobPath(metadata.settings()) + buildBlobPath(metadata.settings()), + snapshotMetrics ) ); } @@ -227,9 +228,10 @@ static class AssertingRepository extends BlobStoreRepository { ClusterService clusterService, BigArrays bigArrays, RecoverySettings recoverySettings, - BlobPath basePath + BlobPath basePath, + SnapshotMetrics snapshotMetrics ) { - super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, basePath, SnapshotMetrics.NOOP); + super(projectId, metadata, namedXContentRegistry, clusterService, bigArrays, recoverySettings, basePath, snapshotMetrics); } void setBlobStore(BlobStore blobStore) { From 3ba5832038baab2ed9a4cb4af8263b3c10cc01b1 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 3 Jul 2025 17:44:06 +1000 Subject: [PATCH 25/65] Publish new stats via node stats --- ...IT.java => RepositorySnapshotStatsIT.java} | 4 +- .../org/elasticsearch/TransportVersions.java | 2 + .../repositories/RepositoriesService.java | 8 +- .../repositories/RepositoriesStats.java | 87 +++++++++++++++++-- .../repositories/Repository.java | 4 + .../blobstore/BlobStoreRepository.java | 6 ++ .../blobstore/BlobStoreSnapshotMetrics.java | 15 ++++ .../cluster/node/stats/NodeStatsTests.java | 2 +- 8 files changed, 111 insertions(+), 17 deletions(-) rename server/src/internalClusterTest/java/org/elasticsearch/snapshots/{RepositoryThrottlingStatsIT.java => RepositorySnapshotStatsIT.java} (96%) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositoryThrottlingStatsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java similarity index 96% rename from server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositoryThrottlingStatsIT.java rename to server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java index b0563bc86b122..3baaca58f0de9 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositoryThrottlingStatsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java @@ -24,9 +24,9 @@ import static org.hamcrest.Matchers.greaterThan; @ESIntegTestCase.ClusterScope(numDataNodes = 0, scope = ESIntegTestCase.Scope.TEST) -public class RepositoryThrottlingStatsIT extends AbstractSnapshotIntegTestCase { +public class RepositorySnapshotStatsIT extends AbstractSnapshotIntegTestCase { - public void testRepositoryThrottlingStats() throws Exception { + public void testRepositorySnapshotStats() throws Exception { logger.info("--> starting a node"); internalCluster().startNode(); diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 486804a1f57ee..bd7235b55688b 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -329,6 +329,8 @@ static TransportVersion def(int id) { public static final TransportVersion PROJECT_STATE_REGISTRY_RECORDS_DELETIONS = def(9_113_0_00); public static final TransportVersion ESQL_SERIALIZE_TIMESERIES_FIELD_TYPE = def(9_114_0_00); public static final TransportVersion ML_INFERENCE_IBM_WATSONX_COMPLETION_ADDED = def(9_115_0_00); + public static final TransportVersion EXTENDED_SNAPSHOT_STATS_IN_NODE_INFO = def(9_116_0_00); + /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java index b93e7facf2ce7..c4795004af705 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java @@ -905,13 +905,7 @@ public List repositoriesStats() { public RepositoriesStats getRepositoriesThrottlingStats() { return new RepositoriesStats( - getRepositories().stream() - .collect( - Collectors.toMap( - r -> r.getMetadata().name(), - r -> new RepositoriesStats.ThrottlingStats(r.getRestoreThrottleTimeInNanos(), r.getSnapshotThrottleTimeInNanos()) - ) - ) + getRepositories().stream().collect(Collectors.toMap(r -> r.getMetadata().name(), Repository::getSnapshotStats)) ); } diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java index 418fd0ee626c7..2b05ed62e8a47 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.TimeValue; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.ToXContentObject; @@ -26,17 +27,17 @@ public class RepositoriesStats implements Writeable, ToXContentFragment { - private final Map repositoryThrottlingStats; + private final Map repositoryThrottlingStats; public RepositoriesStats(StreamInput in) throws IOException { if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_9_X)) { - repositoryThrottlingStats = in.readMap(ThrottlingStats::new); + repositoryThrottlingStats = in.readMap(SnapshotStats::readFrom); } else { repositoryThrottlingStats = new HashMap<>(); } } - public RepositoriesStats(Map repositoryThrottlingStats) { + public RepositoriesStats(Map repositoryThrottlingStats) { this.repositoryThrottlingStats = new HashMap<>(repositoryThrottlingStats); } @@ -53,14 +54,44 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } - public Map getRepositoryThrottlingStats() { + public Map getRepositoryThrottlingStats() { return Collections.unmodifiableMap(repositoryThrottlingStats); } - public record ThrottlingStats(long totalReadThrottledNanos, long totalWriteThrottledNanos) implements ToXContentObject, Writeable { + public record SnapshotStats( + long shardSnapshotsStarted, + long shardSnapshotsCompleted, + long shardSnapshotsInProgress, + long totalReadThrottledNanos, + long totalWriteThrottledNanos, + long numberOfBlobsUploaded, + long numberOfBytesUploaded, + long totalUploadTimeInNanos, + long totalUploadReadTimeInNanos + ) implements ToXContentObject, Writeable { - ThrottlingStats(StreamInput in) throws IOException { - this(in.readVLong(), in.readVLong()); + public static SnapshotStats readFrom(StreamInput in) throws IOException { + final long totalReadThrottledNanos = in.readVLong(); + final long totalWriteThrottledNanos = in.readVLong(); + if (in.getTransportVersion().onOrAfter(TransportVersions.EXTENDED_SNAPSHOT_STATS_IN_NODE_INFO)) { + return new SnapshotStats( + in.readLong(), + in.readLong(), + in.readLong(), + totalReadThrottledNanos, + totalWriteThrottledNanos, + in.readLong(), + in.readLong(), + in.readLong(), + in.readLong() + ); + } else { + return new SnapshotStats(totalReadThrottledNanos, totalWriteThrottledNanos); + } + } + + public SnapshotStats(long totalReadThrottledNanos, long totalWriteThrottledNanos) { + this(-1, -1, -1, totalReadThrottledNanos, totalWriteThrottledNanos, -1, -1, -1, -1); } @Override @@ -72,6 +103,39 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } builder.field("total_read_throttled_time_nanos", totalReadThrottledNanos); builder.field("total_write_throttled_time_nanos", totalWriteThrottledNanos); + if (shardSnapshotsStarted != -1) { + builder.field("shard_snapshots_started", shardSnapshotsStarted); + } + if (shardSnapshotsCompleted != -1) { + builder.field("shard_snapshots_completed", shardSnapshotsCompleted); + } + if (shardSnapshotsInProgress != -1) { + builder.field("shard_snapshots_in_progress", shardSnapshotsInProgress); + } + if (numberOfBlobsUploaded != -1) { + builder.field("blobs_uploaded", numberOfBlobsUploaded); + } + if (numberOfBytesUploaded != -1) { + if (builder.humanReadable()) { + builder.field("bytes_uploaded", ByteSizeValue.ofBytes(numberOfBytesUploaded)); + } else { + builder.field("bytes_uploaded", numberOfBytesUploaded); + } + } + if (totalUploadTimeInNanos != -1) { + if (builder.humanReadable()) { + builder.field("total_upload_time", TimeValue.timeValueNanos(totalUploadTimeInNanos)); + } else { + builder.field("total_upload_time_in_nanos", totalUploadTimeInNanos); + } + } + if (totalUploadReadTimeInNanos != -1) { + if (builder.humanReadable()) { + builder.field("total_read_time", TimeValue.timeValueNanos(totalUploadReadTimeInNanos)); + } else { + builder.field("total_read_time_in_nanos", totalUploadReadTimeInNanos); + } + } builder.endObject(); return builder; } @@ -80,6 +144,15 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws public void writeTo(StreamOutput out) throws IOException { out.writeVLong(totalReadThrottledNanos); out.writeVLong(totalWriteThrottledNanos); + if (out.getTransportVersion().onOrAfter(TransportVersions.EXTENDED_SNAPSHOT_STATS_IN_NODE_INFO)) { + out.writeLong(shardSnapshotsStarted); + out.writeLong(shardSnapshotsCompleted); + out.writeLong(shardSnapshotsInProgress); + out.writeLong(numberOfBlobsUploaded); + out.writeLong(numberOfBytesUploaded); + out.writeLong(totalUploadTimeInNanos); + out.writeLong(totalUploadReadTimeInNanos); + } } } } diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index 37bc299e3d9e2..2ff9a56d23f05 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -356,4 +356,8 @@ static boolean assertSnapshotMetaThread() { default LongWithAttributes getShardSnapshotsInProgress() { return null; } + + default RepositoriesStats.SnapshotStats getSnapshotStats() { + return new RepositoriesStats.SnapshotStats(getRestoreThrottleTimeInNanos(), getSnapshotThrottleTimeInNanos()); + } } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index d77f96b27bf28..e9e78b7c757f3 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -110,6 +110,7 @@ import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.IndexMetaDataGenerations; import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.repositories.RepositoriesStats; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryData; import org.elasticsearch.repositories.RepositoryData.SnapshotDetails; @@ -4226,4 +4227,9 @@ protected Set getExtraUsageFeatures() { public LongWithAttributes getShardSnapshotsInProgress() { return blobStoreSnapshotMetrics.getShardSnapshotsInProgress(); } + + @Override + public RepositoriesStats.SnapshotStats getSnapshotStats() { + return blobStoreSnapshotMetrics.getSnapshotStats(); + } } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java index cfad743d88805..6525e9f232991 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -13,6 +13,7 @@ import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.common.metrics.CounterMetric; import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus; +import org.elasticsearch.repositories.RepositoriesStats; import org.elasticsearch.repositories.SnapshotMetrics; import org.elasticsearch.telemetry.metric.LongWithAttributes; @@ -90,4 +91,18 @@ public void incrementUploadReadTime(long readTimeInNanos) { public LongWithAttributes getShardSnapshotsInProgress() { return new LongWithAttributes(shardSnapshotsInProgress.count(), metricAttributes); } + + public RepositoriesStats.SnapshotStats getSnapshotStats() { + return new RepositoriesStats.SnapshotStats( + numberOfShardSnapshotsStarted.count(), + numberOfShardSnapshotsCompleted.count(), + shardSnapshotsInProgress.count(), + restoreRateLimitingTimeInNanos.count(), + snapshotRateLimitingTimeInNanos.count(), + numberOfBlobsUploaded.count(), + numberOfBytesUploaded.count(), + uploadTimeInNanos.count(), + uploadReadTimeInNanos.count() + ); + } } diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java index e8bbe412cbbab..86ec8bdab7c20 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -1069,7 +1069,7 @@ public static NodeStats createNodeStats() { ); } RepositoriesStats repositoriesStats = new RepositoriesStats( - Map.of("test-repository", new RepositoriesStats.ThrottlingStats(100, 200)) + Map.of("test-repository", new RepositoriesStats.SnapshotStats(100, 200)) ); NodeAllocationStats nodeAllocationStats = new NodeAllocationStats( randomIntBetween(0, 10000), From 3bd6084cc355a8e2bb2cb16781595f338782d6c8 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Fri, 4 Jul 2025 10:20:57 +1000 Subject: [PATCH 26/65] Test for presence of additional status in node stats payload --- .../snapshots/RepositorySnapshotStatsIT.java | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java index 3baaca58f0de9..51a1d326e2881 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java @@ -21,18 +21,23 @@ import java.util.Collections; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.lessThan; @ESIntegTestCase.ClusterScope(numDataNodes = 0, scope = ESIntegTestCase.Scope.TEST) public class RepositorySnapshotStatsIT extends AbstractSnapshotIntegTestCase { - public void testRepositorySnapshotStats() throws Exception { + public void testRepositorySnapshotStats() { logger.info("--> starting a node"); internalCluster().startNode(); logger.info("--> create index"); - createIndexWithRandomDocs("test-idx", 100); + final int numberOfShards = randomIntBetween(2, 6); + createIndex("test-idx", numberOfShards, 0); + ensureGreen(); + indexRandomDocs("test-idx", 100); IndicesStatsResponse indicesStats = indicesAdmin().prepareStats("test-idx").get(); IndexStats indexStats = indicesStats.getIndex("test-idx"); @@ -70,9 +75,18 @@ public void testRepositorySnapshotStats() throws Exception { NodesStatsResponse response = clusterAdmin().prepareNodesStats().setRepositoryStats(true).get(); RepositoriesStats stats = response.getNodes().get(0).getRepositoriesStats(); + // These are just broad sanity checks on the values. There are more detailed checks in SnapshotMetricsIT assertTrue(stats.getRepositoryThrottlingStats().containsKey("test-repo")); - assertTrue(stats.getRepositoryThrottlingStats().get("test-repo").totalWriteThrottledNanos() > 0); - assertTrue(stats.getRepositoryThrottlingStats().get("test-repo").totalReadThrottledNanos() > 0); - + RepositoriesStats.SnapshotStats snapshotStats = stats.getRepositoryThrottlingStats().get("test-repo"); + assertThat(snapshotStats.totalWriteThrottledNanos(), greaterThan(0L)); + assertThat(snapshotStats.totalReadThrottledNanos(), greaterThan(0L)); + assertThat(snapshotStats.shardSnapshotsStarted(), equalTo((long) numberOfShards)); + assertThat(snapshotStats.shardSnapshotsCompleted(), equalTo((long) numberOfShards)); + assertThat(snapshotStats.shardSnapshotsInProgress(), equalTo(0L)); + assertThat(snapshotStats.numberOfBlobsUploaded(), greaterThan(0L)); + assertThat(snapshotStats.numberOfBytesUploaded(), greaterThan(0L)); + assertThat(snapshotStats.totalUploadTimeInNanos(), greaterThan(0L)); + assertThat(snapshotStats.totalUploadReadTimeInNanos(), greaterThan(0L)); + assertThat(snapshotStats.totalUploadReadTimeInNanos(), lessThan(snapshotStats.totalUploadTimeInNanos())); } } From f5efceec030d05e638af246c98d14aaf9a89a0cb Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Fri, 4 Jul 2025 11:33:40 +1000 Subject: [PATCH 27/65] Handle null project ID --- .../org/elasticsearch/repositories/SnapshotMetrics.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java index 9ba3c52aab095..c61222c08bc99 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -11,6 +11,7 @@ import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.RepositoryMetadata; +import org.elasticsearch.core.Nullable; import org.elasticsearch.telemetry.metric.DoubleHistogram; import org.elasticsearch.telemetry.metric.LongCounter; import org.elasticsearch.telemetry.metric.LongWithAttributes; @@ -84,7 +85,11 @@ public void createSnapshotsInProgressMetric(Supplier createAttributesMap(ProjectId projectId, RepositoryMetadata meta) { - return Map.of("project_id", projectId.id(), "repo_type", meta.type(), "repo_name", meta.name()); + public static Map createAttributesMap(@Nullable ProjectId projectId, RepositoryMetadata meta) { + if (projectId == null) { + return Map.of("repo_type", meta.type(), "repo_name", meta.name()); + } else { + return Map.of("project_id", projectId.id(), "repo_type", meta.type(), "repo_name", meta.name()); + } } } From d05eb1739ff0e20cb27a2e5912959261b299e61b Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Fri, 4 Jul 2025 15:08:12 +1000 Subject: [PATCH 28/65] Add Nullable annotations to the projectId parameter --- .../elasticsearch/repositories/azure/AzureRepository.java | 3 ++- .../repositories/gcs/GoogleCloudStorageRepository.java | 3 ++- .../java/org/elasticsearch/repositories/s3/S3Repository.java | 3 ++- .../org/elasticsearch/repositories/url/URLRepository.java | 3 ++- .../org/elasticsearch/repositories/hdfs/HdfsRepository.java | 3 ++- .../repositories/blobstore/BlobStoreRepository.java | 3 ++- .../repositories/blobstore/BlobStoreSnapshotMetrics.java | 3 ++- .../repositories/blobstore/MeteredBlobStoreRepository.java | 3 ++- .../java/org/elasticsearch/repositories/fs/FsRepository.java | 5 +++-- .../LatencySimulatingBlobStoreRepository.java | 3 ++- .../elasticsearch/snapshots/mockstore/MockRepository.java | 3 ++- 11 files changed, 23 insertions(+), 12 deletions(-) diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepository.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepository.java index ed0de60dbd2e9..c3d46b10d92d3 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepository.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureRepository.java @@ -22,6 +22,7 @@ import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.core.Nullable; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoriesMetrics; import org.elasticsearch.repositories.SnapshotMetrics; @@ -112,7 +113,7 @@ public static final class Repository { private final RepositoriesMetrics repositoriesMetrics; public AzureRepository( - final ProjectId projectId, + @Nullable final ProjectId projectId, final RepositoryMetadata metadata, final NamedXContentRegistry namedXContentRegistry, final AzureStorageService storageService, diff --git a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java index b47aa830d2484..0a731e8710979 100644 --- a/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java +++ b/modules/repository-gcs/src/main/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageRepository.java @@ -21,6 +21,7 @@ import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoryException; @@ -90,7 +91,7 @@ class GoogleCloudStorageRepository extends MeteredBlobStoreRepository { private final GcsRepositoryStatsCollector statsCollector; GoogleCloudStorageRepository( - final ProjectId projectId, + @Nullable final ProjectId projectId, final RepositoryMetadata metadata, final NamedXContentRegistry namedXContentRegistry, final GoogleCloudStorageService storageService, diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java index 1b1fb5d4e7aed..83a2c9d21d87c 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java @@ -31,6 +31,7 @@ import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.concurrent.ListenableFuture; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; @@ -277,7 +278,7 @@ class S3Repository extends MeteredBlobStoreRepository { * Constructs an s3 backed repository */ S3Repository( - final ProjectId projectId, + @Nullable final ProjectId projectId, final RepositoryMetadata metadata, final NamedXContentRegistry namedXContentRegistry, final S3Service service, diff --git a/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java b/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java index 1cc6917562504..43ed19e73d7f8 100644 --- a/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java +++ b/modules/repository-url/src/main/java/org/elasticsearch/repositories/url/URLRepository.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.URIPattern; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Nullable; import org.elasticsearch.env.Environment; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoryException; @@ -94,7 +95,7 @@ public class URLRepository extends BlobStoreRepository { * Constructs a read-only URL-based repository */ public URLRepository( - ProjectId projectId, + @Nullable ProjectId projectId, RepositoryMetadata metadata, Environment environment, NamedXContentRegistry namedXContentRegistry, diff --git a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java index 00b0bb92c6624..88188497ef90e 100644 --- a/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java +++ b/plugins/repository-hdfs/src/main/java/org/elasticsearch/repositories/hdfs/HdfsRepository.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.env.Environment; import org.elasticsearch.indices.recovery.RecoverySettings; @@ -58,7 +59,7 @@ public final class HdfsRepository extends BlobStoreRepository { private final String pathSetting; public HdfsRepository( - ProjectId projectId, + @Nullable ProjectId projectId, RepositoryMetadata metadata, Environment environment, NamedXContentRegistry namedXContentRegistry, diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index d7b24a3a7a04b..ff1bcae629d5d 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -200,6 +200,7 @@ private class ShutdownLogger { private static final Logger shutdownLogger = LogManager.getLogger(ShutdownLogger.class); } + @Nullable private final ProjectId projectId; protected volatile RepositoryMetadata metadata; @@ -498,7 +499,7 @@ public static String getRepositoryDataBlobName(long repositoryGeneration) { */ @SuppressWarnings("this-escape") protected BlobStoreRepository( - final ProjectId projectId, + @Nullable final ProjectId projectId, final RepositoryMetadata metadata, final NamedXContentRegistry namedXContentRegistry, final ClusterService clusterService, diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java index 6525e9f232991..609506b087d32 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -12,6 +12,7 @@ import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.common.metrics.CounterMetric; +import org.elasticsearch.core.Nullable; import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus; import org.elasticsearch.repositories.RepositoriesStats; import org.elasticsearch.repositories.SnapshotMetrics; @@ -33,7 +34,7 @@ public class BlobStoreSnapshotMetrics { private final CounterMetric numberOfShardSnapshotsCompleted = new CounterMetric(); private final Map metricAttributes; - public BlobStoreSnapshotMetrics(ProjectId projectId, RepositoryMetadata repositoryMetadata, SnapshotMetrics snapshotMetrics) { + public BlobStoreSnapshotMetrics(@Nullable ProjectId projectId, RepositoryMetadata repositoryMetadata, SnapshotMetrics snapshotMetrics) { this.snapshotMetrics = snapshotMetrics; metricAttributes = SnapshotMetrics.createAttributesMap(projectId, repositoryMetadata); } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/MeteredBlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/MeteredBlobStoreRepository.java index 478a936b6c8b9..3b226875de7ef 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/MeteredBlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/MeteredBlobStoreRepository.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.blobstore.BlobPath; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.core.Nullable; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoryInfo; import org.elasticsearch.repositories.RepositoryStatsSnapshot; @@ -28,7 +29,7 @@ public abstract class MeteredBlobStoreRepository extends BlobStoreRepository { private final RepositoryInfo repositoryInfo; public MeteredBlobStoreRepository( - ProjectId projectId, + @Nullable ProjectId projectId, RepositoryMetadata metadata, NamedXContentRegistry namedXContentRegistry, ClusterService clusterService, diff --git a/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java b/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java index 7db886dd6bb2f..15b5e9a7d480e 100644 --- a/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/fs/FsRepository.java @@ -21,6 +21,7 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.core.Nullable; import org.elasticsearch.env.Environment; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.RepositoryException; @@ -77,7 +78,7 @@ public class FsRepository extends BlobStoreRepository { * Constructs a shared file system repository. */ public FsRepository( - ProjectId projectId, + @Nullable ProjectId projectId, RepositoryMetadata metadata, Environment environment, NamedXContentRegistry namedXContentRegistry, @@ -92,7 +93,7 @@ public FsRepository( * Constructs a shared file system repository. */ public FsRepository( - ProjectId projectId, + @Nullable ProjectId projectId, RepositoryMetadata metadata, Environment environment, NamedXContentRegistry namedXContentRegistry, diff --git a/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepository.java b/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepository.java index a4126c41c0925..dbe648e49ee03 100644 --- a/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepository.java +++ b/test/external-modules/latency-simulating-directory/src/main/java/org/elasticsearch/test/simulatedlatencyrepo/LatencySimulatingBlobStoreRepository.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.blobstore.OperationPurpose; import org.elasticsearch.common.blobstore.support.FilterBlobContainer; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.core.Nullable; import org.elasticsearch.env.Environment; import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.repositories.fs.FsRepository; @@ -31,7 +32,7 @@ class LatencySimulatingBlobStoreRepository extends FsRepository { private final Runnable simulator; protected LatencySimulatingBlobStoreRepository( - ProjectId projectId, + @Nullable ProjectId projectId, RepositoryMetadata metadata, Environment env, NamedXContentRegistry namedXContentRegistry, diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java index e0d84de74dc53..6088dc00efc26 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/mockstore/MockRepository.java @@ -34,6 +34,7 @@ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.core.CheckedConsumer; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.PathUtils; import org.elasticsearch.env.Environment; import org.elasticsearch.indices.recovery.RecoverySettings; @@ -188,7 +189,7 @@ public long getFailureCount() { private volatile boolean failOnDeleteContainer = false; public MockRepository( - ProjectId projectId, + @Nullable ProjectId projectId, RepositoryMetadata metadata, Environment environment, NamedXContentRegistry namedXContentRegistry, From 0a19687092fa494f6838b6567f48731fa9e4c69e Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 7 Jul 2025 10:46:13 +0930 Subject: [PATCH 29/65] Populate attributes consistently, add shard stage and snapshot state --- .../repositories/SnapshotMetricsIT.java | 69 +++++++++++++++++++ .../blobstore/BlobStoreSnapshotMetrics.java | 22 +++--- .../snapshots/SnapshotsService.java | 11 +-- 3 files changed, 86 insertions(+), 16 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index f0d7f2a8bebad..e643c02551cb3 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -12,14 +12,20 @@ import org.elasticsearch.action.admin.indices.stats.IndexStats; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.action.admin.indices.stats.ShardStats; +import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.common.util.Maps; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; +import org.elasticsearch.snapshots.SnapshotState; +import org.elasticsearch.telemetry.InstrumentType; import org.elasticsearch.telemetry.Measurement; +import org.elasticsearch.telemetry.RecordingMeterRegistry; import org.elasticsearch.telemetry.TestTelemetryPlugin; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.threadpool.ThreadPool; @@ -27,17 +33,21 @@ import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.stream.Stream; import java.util.stream.StreamSupport; import static org.elasticsearch.threadpool.ThreadPool.ESTIMATED_TIME_INTERVAL_SETTING; import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.everyItem; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasEntry; import static org.hamcrest.Matchers.hasItem; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.lessThan; +import static org.hamcrest.Matchers.not; @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST) public class SnapshotMetricsIT extends AbstractSnapshotIntegTestCase { @@ -170,6 +180,65 @@ public void testSnapshotAPMMetrics() throws Exception { // assert we throttled on restore assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_RESTORE_THROTTLE_DURATION), greaterThan(0L)); + + // assert appropriate attributes are present + final Map expectedAttrs = Map.of( + "project_id", + ProjectId.DEFAULT.id(), + "repo_name", + repositoryName, + "repo_type", + "mock" + ); + final Map expectedAttrsWithShardStage = Maps.copyMapWithAddedEntry( + expectedAttrs, + "stage", + IndexShardSnapshotStatus.Stage.DONE.name() + ); + final Map expectedAttrsWithSnapshotState = Maps.copyMapWithAddedEntry( + expectedAttrs, + "state", + SnapshotState.SUCCESS.name() + ); + assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOTS_STARTED, expectedAttrs); + assertMetricsHaveAttributes(InstrumentType.LONG_GAUGE, SnapshotMetrics.SNAPSHOTS_IN_PROGRESS, expectedAttrs); + assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOTS_COMPLETED, expectedAttrsWithSnapshotState); + assertMetricsHaveAttributes(InstrumentType.DOUBLE_HISTOGRAM, SnapshotMetrics.SNAPSHOT_DURATION, expectedAttrsWithSnapshotState); + + assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOT_SHARDS_STARTED, expectedAttrs); + assertMetricsHaveAttributes(InstrumentType.LONG_GAUGE, SnapshotMetrics.SNAPSHOT_SHARDS_IN_PROGRESS, expectedAttrs); + assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOT_SHARDS_COMPLETED, expectedAttrsWithShardStage); + assertMetricsHaveAttributes(InstrumentType.DOUBLE_HISTOGRAM, SnapshotMetrics.SNAPSHOT_SHARDS_DURATION, expectedAttrsWithShardStage); + + assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOT_RESTORE_THROTTLE_DURATION, expectedAttrs); + assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOT_CREATE_THROTTLE_DURATION, expectedAttrs); + assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOT_UPLOAD_READ_DURATION, expectedAttrs); + assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOT_UPLOAD_DURATION, expectedAttrs); + assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOT_BYTES_UPLOADED, expectedAttrs); + assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOT_BLOBS_UPLOADED, expectedAttrs); + } + + private static void assertMetricsHaveAttributes( + InstrumentType instrumentType, + String metricName, + Map expectedAttributes + ) { + final List clusterMeasurements = getClusterMeasurements(instrumentType, metricName); + assertThat(clusterMeasurements, not(empty())); + clusterMeasurements.forEach(recordingMetric -> { + for (Map.Entry entry : expectedAttributes.entrySet()) { + assertThat(recordingMetric.attributes(), hasEntry(entry.getKey(), entry.getValue())); + } + }); + } + + private static List getClusterMeasurements(InstrumentType instrumentType, String metricName) { + return allTestTelemetryPlugins().flatMap( + testTelemetryPlugin -> ((RecordingMeterRegistry) testTelemetryPlugin.getTelemetryProvider(Settings.EMPTY).getMeterRegistry()) + .getRecorder() + .getMeasurements(instrumentType, metricName) + .stream() + ).toList(); } private static void assertDoubleHistogramMetrics(String metricName, Matcher> matcher) { diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java index 609506b087d32..bb11580b67490 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -12,6 +12,7 @@ import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.common.metrics.CounterMetric; +import org.elasticsearch.common.util.Maps; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus; import org.elasticsearch.repositories.RepositoriesStats; @@ -40,7 +41,7 @@ public BlobStoreSnapshotMetrics(@Nullable ProjectId projectId, RepositoryMetadat } public void incrementSnapshotRateLimitingTimeInNanos(long throttleTimeNanos) { - snapshotMetrics.snapshotCreateThrottleDurationCounter().incrementBy(throttleTimeNanos); + snapshotMetrics.snapshotCreateThrottleDurationCounter().incrementBy(throttleTimeNanos, metricAttributes); snapshotRateLimitingTimeInNanos.inc(throttleTimeNanos); } @@ -49,7 +50,7 @@ public long snapshotRateLimitingTimeInNanos() { } public void incrementRestoreRateLimitingTimeInNanos(long throttleTimeNanos) { - snapshotMetrics.snapshotRestoreThrottleDurationCounter().incrementBy(throttleTimeNanos); + snapshotMetrics.snapshotRestoreThrottleDurationCounter().incrementBy(throttleTimeNanos, metricAttributes); restoreRateLimitingTimeInNanos.inc(throttleTimeNanos); } @@ -58,34 +59,33 @@ public long restoreRateLimitingTimeInNanos() { } public void incrementCountersForPartUpload(long partSizeInBytes, long partWriteTimeNanos) { - snapshotMetrics.snapshotBytesUploadedCounter().incrementBy(partSizeInBytes); - snapshotMetrics.snapshotUploadDurationCounter().incrementBy(partWriteTimeNanos); + snapshotMetrics.snapshotBytesUploadedCounter().incrementBy(partSizeInBytes, metricAttributes); + snapshotMetrics.snapshotUploadDurationCounter().incrementBy(partWriteTimeNanos, metricAttributes); numberOfBytesUploaded.inc(partSizeInBytes); uploadTimeInNanos.inc(partWriteTimeNanos); } public void incrementNumberOfBlobsUploaded() { - snapshotMetrics.snapshotBlobsUploadedCounter().increment(); + snapshotMetrics.snapshotBlobsUploadedCounter().incrementBy(1, metricAttributes); numberOfBlobsUploaded.inc(); } public void shardSnapshotStarted() { - snapshotMetrics.snapshotsShardsStartedCounter().increment(); + snapshotMetrics.snapshotsShardsStartedCounter().incrementBy(1, metricAttributes); numberOfShardSnapshotsStarted.inc(); shardSnapshotsInProgress.inc(); } public void shardSnapshotCompleted(IndexShardSnapshotStatus status) { - snapshotMetrics.snapshotsShardsCompletedCounter().increment(); - if (status.getStage() == IndexShardSnapshotStatus.Stage.DONE) { - snapshotMetrics.snapshotShardsDurationHistogram().record(status.getTotalTime() / 1_000f); - } + final Map attrsWithStage = Maps.copyMapWithAddedEntry(metricAttributes, "stage", status.getStage().name()); + snapshotMetrics.snapshotsShardsCompletedCounter().incrementBy(1, attrsWithStage); + snapshotMetrics.snapshotShardsDurationHistogram().record(status.getTotalTime() / 1_000f, attrsWithStage); numberOfShardSnapshotsCompleted.inc(); shardSnapshotsInProgress.dec(); } public void incrementUploadReadTime(long readTimeInNanos) { - snapshotMetrics.snapshotUploadReadDurationCounter().incrementBy(readTimeInNanos); + snapshotMetrics.snapshotUploadReadDurationCounter().incrementBy(readTimeInNanos, metricAttributes); uploadReadTimeInNanos.inc(readTimeInNanos); } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index ceb3f0985cbec..4d044a301c495 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -1654,13 +1654,14 @@ protected void doRun() { @Override public void onResponse(List> actionListeners) { completeListenersIgnoringException(actionListeners, snapshotInfo); - final Map attributes = SnapshotMetrics.createAttributesMap( - snapshot.getProjectId(), - repo.getMetadata() + final Map attributesWithState = Maps.copyMapWithAddedEntry( + SnapshotMetrics.createAttributesMap(snapshot.getProjectId(), repo.getMetadata()), + "state", + snapshotInfo.state().name() ); - snapshotMetrics.snapshotsCompletedCounter().incrementBy(1, attributes); + snapshotMetrics.snapshotsCompletedCounter().incrementBy(1, attributesWithState); snapshotMetrics.snapshotsDurationHistogram() - .record((snapshotInfo.endTime() - snapshotInfo.startTime()) / 1_000.0, attributes); + .record((snapshotInfo.endTime() - snapshotInfo.startTime()) / 1_000.0, attributesWithState); logger.info("snapshot [{}] completed with state [{}]", snapshot, snapshotInfo.state()); } From 85a98fc3192818947dd643630de8b719340238e1 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 7 Jul 2025 11:00:09 +0930 Subject: [PATCH 30/65] Remove redundant snapshot prefix in metric fields --- .../repositories/SnapshotMetrics.java | 18 +++++++++--------- .../blobstore/BlobStoreSnapshotMetrics.java | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java index c61222c08bc99..9a89c7aedd7cd 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -25,15 +25,15 @@ public record SnapshotMetrics( LongCounter snapshotsStartedCounter, LongCounter snapshotsCompletedCounter, DoubleHistogram snapshotsDurationHistogram, - LongCounter snapshotsShardsStartedCounter, - LongCounter snapshotsShardsCompletedCounter, - DoubleHistogram snapshotShardsDurationHistogram, - LongCounter snapshotBlobsUploadedCounter, - LongCounter snapshotBytesUploadedCounter, - LongCounter snapshotUploadDurationCounter, - LongCounter snapshotUploadReadDurationCounter, - LongCounter snapshotCreateThrottleDurationCounter, - LongCounter snapshotRestoreThrottleDurationCounter, + LongCounter shardsStartedCounter, + LongCounter shardsCompletedCounter, + DoubleHistogram shardsDurationHistogram, + LongCounter blobsUploadedCounter, + LongCounter bytesUploadedCounter, + LongCounter uploadDurationCounter, + LongCounter uploadReadDurationCounter, + LongCounter createThrottleDurationCounter, + LongCounter restoreThrottleDurationCounter, MeterRegistry meterRegistry ) { diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java index bb11580b67490..a00ce3e046feb 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -41,7 +41,7 @@ public BlobStoreSnapshotMetrics(@Nullable ProjectId projectId, RepositoryMetadat } public void incrementSnapshotRateLimitingTimeInNanos(long throttleTimeNanos) { - snapshotMetrics.snapshotCreateThrottleDurationCounter().incrementBy(throttleTimeNanos, metricAttributes); + snapshotMetrics.createThrottleDurationCounter().incrementBy(throttleTimeNanos, metricAttributes); snapshotRateLimitingTimeInNanos.inc(throttleTimeNanos); } @@ -50,7 +50,7 @@ public long snapshotRateLimitingTimeInNanos() { } public void incrementRestoreRateLimitingTimeInNanos(long throttleTimeNanos) { - snapshotMetrics.snapshotRestoreThrottleDurationCounter().incrementBy(throttleTimeNanos, metricAttributes); + snapshotMetrics.restoreThrottleDurationCounter().incrementBy(throttleTimeNanos, metricAttributes); restoreRateLimitingTimeInNanos.inc(throttleTimeNanos); } @@ -59,33 +59,33 @@ public long restoreRateLimitingTimeInNanos() { } public void incrementCountersForPartUpload(long partSizeInBytes, long partWriteTimeNanos) { - snapshotMetrics.snapshotBytesUploadedCounter().incrementBy(partSizeInBytes, metricAttributes); - snapshotMetrics.snapshotUploadDurationCounter().incrementBy(partWriteTimeNanos, metricAttributes); + snapshotMetrics.bytesUploadedCounter().incrementBy(partSizeInBytes, metricAttributes); + snapshotMetrics.uploadDurationCounter().incrementBy(partWriteTimeNanos, metricAttributes); numberOfBytesUploaded.inc(partSizeInBytes); uploadTimeInNanos.inc(partWriteTimeNanos); } public void incrementNumberOfBlobsUploaded() { - snapshotMetrics.snapshotBlobsUploadedCounter().incrementBy(1, metricAttributes); + snapshotMetrics.blobsUploadedCounter().incrementBy(1, metricAttributes); numberOfBlobsUploaded.inc(); } public void shardSnapshotStarted() { - snapshotMetrics.snapshotsShardsStartedCounter().incrementBy(1, metricAttributes); + snapshotMetrics.shardsStartedCounter().incrementBy(1, metricAttributes); numberOfShardSnapshotsStarted.inc(); shardSnapshotsInProgress.inc(); } public void shardSnapshotCompleted(IndexShardSnapshotStatus status) { final Map attrsWithStage = Maps.copyMapWithAddedEntry(metricAttributes, "stage", status.getStage().name()); - snapshotMetrics.snapshotsShardsCompletedCounter().incrementBy(1, attrsWithStage); - snapshotMetrics.snapshotShardsDurationHistogram().record(status.getTotalTime() / 1_000f, attrsWithStage); + snapshotMetrics.shardsCompletedCounter().incrementBy(1, attrsWithStage); + snapshotMetrics.shardsDurationHistogram().record(status.getTotalTime() / 1_000f, attrsWithStage); numberOfShardSnapshotsCompleted.inc(); shardSnapshotsInProgress.dec(); } public void incrementUploadReadTime(long readTimeInNanos) { - snapshotMetrics.snapshotUploadReadDurationCounter().incrementBy(readTimeInNanos, metricAttributes); + snapshotMetrics.uploadReadDurationCounter().incrementBy(readTimeInNanos, metricAttributes); uploadReadTimeInNanos.inc(readTimeInNanos); } From ac2a2a34e9ae8e965009441b725bcc45a7082b37 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 7 Jul 2025 11:54:03 +0930 Subject: [PATCH 31/65] Don't track stats when project ID is null --- .../elasticsearch/repositories/SnapshotMetrics.java | 10 +++------- .../blobstore/BlobStoreSnapshotMetrics.java | 10 ++++++++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java index 9a89c7aedd7cd..323749bcc6570 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -11,7 +11,6 @@ import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.RepositoryMetadata; -import org.elasticsearch.core.Nullable; import org.elasticsearch.telemetry.metric.DoubleHistogram; import org.elasticsearch.telemetry.metric.LongCounter; import org.elasticsearch.telemetry.metric.LongWithAttributes; @@ -85,11 +84,8 @@ public void createSnapshotsInProgressMetric(Supplier createAttributesMap(@Nullable ProjectId projectId, RepositoryMetadata meta) { - if (projectId == null) { - return Map.of("repo_type", meta.type(), "repo_name", meta.name()); - } else { - return Map.of("project_id", projectId.id(), "repo_type", meta.type(), "repo_name", meta.name()); - } + public static Map createAttributesMap(ProjectId projectId, RepositoryMetadata meta) { + assert projectId != null : "Project ID should always be set"; + return Map.of("project_id", projectId.id(), "repo_type", meta.type(), "repo_name", meta.name()); } } diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java index a00ce3e046feb..860bef7c9ca76 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -36,8 +36,14 @@ public class BlobStoreSnapshotMetrics { private final Map metricAttributes; public BlobStoreSnapshotMetrics(@Nullable ProjectId projectId, RepositoryMetadata repositoryMetadata, SnapshotMetrics snapshotMetrics) { - this.snapshotMetrics = snapshotMetrics; - metricAttributes = SnapshotMetrics.createAttributesMap(projectId, repositoryMetadata); + if (projectId != null) { + this.snapshotMetrics = snapshotMetrics; + metricAttributes = SnapshotMetrics.createAttributesMap(projectId, repositoryMetadata); + } else { + // Project ID should only be null for the stateless main blobstore, which is not used for snapshots + this.snapshotMetrics = SnapshotMetrics.NOOP; + this.metricAttributes = Map.of(); + } } public void incrementSnapshotRateLimitingTimeInNanos(long throttleTimeNanos) { From c04e9ed1c1471b512cc2378b621bce83d48fed34 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 7 Jul 2025 12:32:56 +0930 Subject: [PATCH 32/65] Rename repositoryThrottlingStats -> repositorySnapshotStats --- .../snapshots/RepositorySnapshotStatsIT.java | 4 ++-- .../repositories/RepositoriesStats.java | 18 +++++++++--------- .../cluster/node/stats/NodeStatsTests.java | 6 +++--- .../repositories/RepositoriesServiceTests.java | 4 ++-- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java index 51a1d326e2881..bb43cbb674c42 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java @@ -76,8 +76,8 @@ public void testRepositorySnapshotStats() { RepositoriesStats stats = response.getNodes().get(0).getRepositoriesStats(); // These are just broad sanity checks on the values. There are more detailed checks in SnapshotMetricsIT - assertTrue(stats.getRepositoryThrottlingStats().containsKey("test-repo")); - RepositoriesStats.SnapshotStats snapshotStats = stats.getRepositoryThrottlingStats().get("test-repo"); + assertTrue(stats.getRepositorySnapshotStats().containsKey("test-repo")); + RepositoriesStats.SnapshotStats snapshotStats = stats.getRepositorySnapshotStats().get("test-repo"); assertThat(snapshotStats.totalWriteThrottledNanos(), greaterThan(0L)); assertThat(snapshotStats.totalReadThrottledNanos(), greaterThan(0L)); assertThat(snapshotStats.shardSnapshotsStarted(), equalTo((long) numberOfShards)); diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java index 2b05ed62e8a47..a4cffad1e8d6c 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java @@ -27,35 +27,35 @@ public class RepositoriesStats implements Writeable, ToXContentFragment { - private final Map repositoryThrottlingStats; + private final Map repositorySnapshotStats; public RepositoriesStats(StreamInput in) throws IOException { if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_9_X)) { - repositoryThrottlingStats = in.readMap(SnapshotStats::readFrom); + repositorySnapshotStats = in.readMap(SnapshotStats::readFrom); } else { - repositoryThrottlingStats = new HashMap<>(); + repositorySnapshotStats = new HashMap<>(); } } - public RepositoriesStats(Map repositoryThrottlingStats) { - this.repositoryThrottlingStats = new HashMap<>(repositoryThrottlingStats); + public RepositoriesStats(Map repositorySnapshotStats) { + this.repositorySnapshotStats = new HashMap<>(repositorySnapshotStats); } @Override public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_9_X)) { - out.writeMap(repositoryThrottlingStats, StreamOutput::writeWriteable); + out.writeMap(repositorySnapshotStats, StreamOutput::writeWriteable); } } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.field("repositories", repositoryThrottlingStats); + builder.field("repositories", repositorySnapshotStats); return builder; } - public Map getRepositoryThrottlingStats() { - return Collections.unmodifiableMap(repositoryThrottlingStats); + public Map getRepositorySnapshotStats() { + return Collections.unmodifiableMap(repositorySnapshotStats); } public record SnapshotStats( diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java index 86ec8bdab7c20..34f9395fd415b 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -469,9 +469,9 @@ public void testSerialization() throws IOException { } RepositoriesStats repoThrottlingStats = deserializedNodeStats.getRepositoriesStats(); - assertTrue(repoThrottlingStats.getRepositoryThrottlingStats().containsKey("test-repository")); - assertEquals(100, repoThrottlingStats.getRepositoryThrottlingStats().get("test-repository").totalReadThrottledNanos()); - assertEquals(200, repoThrottlingStats.getRepositoryThrottlingStats().get("test-repository").totalWriteThrottledNanos()); + assertTrue(repoThrottlingStats.getRepositorySnapshotStats().containsKey("test-repository")); + assertEquals(100, repoThrottlingStats.getRepositorySnapshotStats().get("test-repository").totalReadThrottledNanos()); + assertEquals(200, repoThrottlingStats.getRepositorySnapshotStats().get("test-repository").totalWriteThrottledNanos()); } } diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java index 852fc19ebdea2..fec5869afd487 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java @@ -378,8 +378,8 @@ public void testRepositoriesThrottlingStats() { var clusterState = createClusterStateWithRepo(repoName, TestRepository.TYPE); repositoriesService.applyClusterState(new ClusterChangedEvent("put test repository", clusterState, emptyState())); RepositoriesStats throttlingStats = repositoriesService.getRepositoriesThrottlingStats(); - assertTrue(throttlingStats.getRepositoryThrottlingStats().containsKey(repoName)); - assertNotNull(throttlingStats.getRepositoryThrottlingStats().get(repoName)); + assertTrue(throttlingStats.getRepositorySnapshotStats().containsKey(repoName)); + assertNotNull(throttlingStats.getRepositorySnapshotStats().get(repoName)); } // InvalidRepository is created when current node is non-master node and failed to create repository by applying cluster state from From 94b2df502950832b4e0d0b54b750ff9961efaddc Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 7 Jul 2025 13:08:19 +0930 Subject: [PATCH 33/65] Only the current master reports snapshots in progress --- .../org/elasticsearch/snapshots/SnapshotsService.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 4d044a301c495..469c70251aaa5 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -4457,9 +4457,14 @@ private SnapshotsInProgress createSnapshot( } private Collection getSnapshotsInProgress() { - final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(clusterService.state()); + final ClusterState currentState = clusterService.state(); + // Only the master should report on snapshots-in-progress + if (currentState.nodes().isLocalNodeElectedMaster() == false) { + return List.of(); + } + final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(currentState); final List snapshotsInProgressMetrics = new ArrayList<>(); - clusterService.state().metadata().projects().forEach((projectId, project) -> { + currentState.metadata().projects().forEach((projectId, project) -> { RepositoriesMetadata repositoriesMetadata = RepositoriesMetadata.get(project); if (repositoriesMetadata != null) { repositoriesMetadata.repositories().forEach(repository -> { From 44510fa8e1e56b2602be327af2283cecfefb31b1 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 7 Jul 2025 14:49:02 +0930 Subject: [PATCH 34/65] Increment uploaded blobs metric before verify --- .../repositories/blobstore/BlobStoreRepository.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index ff1bcae629d5d..29ba11e4dd19f 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -4174,8 +4174,8 @@ private void checkAborted() { TimeUnit.NANOSECONDS.toMillis(uploadTimeInNanos) ); } - Store.verify(indexInput); blobStoreSnapshotMetrics.incrementNumberOfBlobsUploaded(); + Store.verify(indexInput); snapshotStatus.addProcessedFile(fileInfo.length()); } catch (Exception t) { failStoreIfCorrupted(store, t); From efbc43daf964bf7a2ad4b987b03d447ee7eb2ea0 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Mon, 7 Jul 2025 16:13:38 +0930 Subject: [PATCH 35/65] Add counts for shards in movement blocking states --- .../repositories/SnapshotMetrics.java | 10 ++++ .../snapshots/SnapshotsService.java | 53 ++++++++++++++++++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java index 323749bcc6570..823c90becc75d 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -45,6 +45,7 @@ public record SnapshotMetrics( public static final String SNAPSHOT_SHARDS_STARTED = "es.repositories.snapshots.shards.started.total"; public static final String SNAPSHOT_SHARDS_COMPLETED = "es.repositories.snapshots.shards.completed.total"; public static final String SNAPSHOT_SHARDS_IN_PROGRESS = "es.repositories.snapshots.shards.current"; + public static final String SNAPSHOT_SHARDS_BY_STATUS = "es.repositories.snapshots.shards.by_status.current"; public static final String SNAPSHOT_SHARDS_DURATION = "es.repositories.snapshots.shards.duration.histogram"; public static final String SNAPSHOT_BLOBS_UPLOADED = "es.repositories.snapshots.blobs.uploaded.total"; public static final String SNAPSHOT_BYTES_UPLOADED = "es.repositories.snapshots.upload.bytes.total"; @@ -84,6 +85,15 @@ public void createSnapshotsInProgressMetric(Supplier> shardSnapshotsByStatusObserver) { + meterRegistry.registerLongsGauge( + SNAPSHOT_SHARDS_BY_STATUS, + "snapshotting shards by (potentially movement-blocking) status", + "unit", + shardSnapshotsByStatusObserver + ); + } + public static Map createAttributesMap(ProjectId projectId, RepositoryMetadata meta) { assert projectId != null : "Project ID should always be set"; return Map.of("project_id", projectId.id(), "repo_type", meta.type(), "repo_name", meta.name()); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 469c70251aaa5..48bcf53e194f7 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -9,6 +9,10 @@ package org.elasticsearch.snapshots; +import com.carrotsearch.hppc.ObjectIntHashMap; +import com.carrotsearch.hppc.ObjectIntMap; +import com.carrotsearch.hppc.cursors.ObjectIntCursor; + import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -58,6 +62,7 @@ import org.elasticsearch.cluster.routing.RerouteService; import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.cluster.service.MasterService; import org.elasticsearch.cluster.service.MasterServiceTaskQueue; @@ -162,6 +167,18 @@ public final class SnapshotsService extends AbstractLifecycleComponent implement private static final Logger logger = LogManager.getLogger(SnapshotsService.class); + /** + * We publish metrics of how many shards are in each of the following states + * these should be the list of status that potentially block movement in + * {@link SnapshotInProgressAllocationDecider} + */ + private static final List TRACKED_SHARD_STATES = List.of( + ShardState.INIT, + ShardState.PAUSED_FOR_NODE_REMOVAL, + ShardState.WAITING, + ShardState.QUEUED + ); + public static final String UPDATE_SNAPSHOT_STATUS_ACTION_NAME = "internal:cluster/snapshot/update_snapshot_status"; public static final String NO_FEATURE_STATES_VALUE = "none"; @@ -243,6 +260,7 @@ public SnapshotsService( this.transportService = transportService; this.snapshotMetrics = snapshotMetrics; snapshotMetrics.createSnapshotsInProgressMetric(this::getSnapshotsInProgress); + snapshotMetrics.createSnapshotShardsByStatusMetric(this::getShardsByState); // The constructor of UpdateSnapshotStatusAction will register itself to the TransportService. this.updateSnapshotStatusHandler = new UpdateSnapshotStatusAction(transportService, clusterService, threadPool, actionFilters); @@ -4465,7 +4483,7 @@ private Collection getSnapshotsInProgress() { final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(currentState); final List snapshotsInProgressMetrics = new ArrayList<>(); currentState.metadata().projects().forEach((projectId, project) -> { - RepositoriesMetadata repositoriesMetadata = RepositoriesMetadata.get(project); + final RepositoriesMetadata repositoriesMetadata = RepositoriesMetadata.get(project); if (repositoriesMetadata != null) { repositoriesMetadata.repositories().forEach(repository -> { int snapshotCount = snapshotsInProgress.forRepo(projectId, repository.name()).size(); @@ -4478,6 +4496,39 @@ private Collection getSnapshotsInProgress() { return snapshotsInProgressMetrics; } + private Collection getShardsByState() { + final ClusterState currentState = clusterService.state(); + // Only the master should report on shards-by-state + if (currentState.nodes().isLocalNodeElectedMaster() == false) { + return List.of(); + } + final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(currentState); + final List shardsByState = new ArrayList<>(); + final ObjectIntMap shardCounts = new ObjectIntHashMap<>(TRACKED_SHARD_STATES.size()); + currentState.metadata().projects().forEach((projectId, project) -> { + final RepositoriesMetadata repositoriesMetadata = RepositoriesMetadata.get(project); + if (repositoriesMetadata != null) { + for (RepositoryMetadata repository : repositoriesMetadata.repositories()) { + TRACKED_SHARD_STATES.forEach(shardState -> shardCounts.put(shardState, 0)); + for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.forRepo(projectId, repository.name())) { + for (ShardSnapshotStatus shardSnapshotStatus : snapshot.shards().values()) { + if (shardCounts.containsKey(shardSnapshotStatus.state())) { + shardCounts.put(shardSnapshotStatus.state(), shardCounts.get(shardSnapshotStatus.state()) + 1); + } + } + } + final Map attributesMap = SnapshotMetrics.createAttributesMap(projectId, repository); + for (ObjectIntCursor entry : shardCounts) { + shardsByState.add( + new LongWithAttributes(entry.value, Maps.copyMapWithAddedEntry(attributesMap, "state", entry.key.name())) + ); + } + } + } + }); + return shardsByState; + } + private record UpdateNodeIdsForRemovalTask() implements ClusterStateTaskListener { @Override public void onFailure(Exception e) { From 63b09dee7c2bd394aa00084407b42b301fcf488b Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 13:08:07 +0930 Subject: [PATCH 36/65] Add tests for shards by state counts --- .../repositories/SnapshotMetricsIT.java | 142 +++++++++++++++++- .../snapshots/SnapshotShutdownIT.java | 134 ----------------- .../AbstractSnapshotIntegTestCase.java | 137 +++++++++++++++++ 3 files changed, 275 insertions(+), 138 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index e643c02551cb3..e763d72c4f84c 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -12,7 +12,10 @@ import org.elasticsearch.action.admin.indices.stats.IndexStats; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.action.admin.indices.stats.ShardStats; +import org.elasticsearch.cluster.SnapshotsInProgress; +import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.ProjectId; +import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.CollectionUtils; @@ -32,6 +35,7 @@ import org.hamcrest.Matcher; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Stream; @@ -74,8 +78,8 @@ public void testSnapshotAPMMetrics() throws Exception { indexRandom(true, indexName, randomIntBetween(100, 300)); - IndicesStatsResponse indicesStats = indicesAdmin().prepareStats(indexName).get(); - IndexStats indexStats = indicesStats.getIndex(indexName); + final IndicesStatsResponse indicesStats = indicesAdmin().prepareStats(indexName).get(); + final IndexStats indexStats = indicesStats.getIndex(indexName); long totalSizeInBytes = 0; for (ShardStats shard : indexStats.getShards()) { totalSizeInBytes += shard.getStats().getStore().sizeInBytes(); @@ -85,8 +89,7 @@ public void testSnapshotAPMMetrics() throws Exception { final String repositoryName = randomIdentifier(); // we want to ensure some throttling, but not so much that it makes the test excessively slow. - // 3 seemed a reasonable multiple to ensure that. - final int shardSizeMultipleToEnsureThrottling = 3; + final int shardSizeMultipleToEnsureThrottling = 2; createRepository( repositoryName, "mock", @@ -218,6 +221,137 @@ public void testSnapshotAPMMetrics() throws Exception { assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOT_BLOBS_UPLOADED, expectedAttrs); } + public void testShardsByStateCounts_InitAndQueued() throws Exception { + final String indexName = randomIdentifier(); + final int numShards = randomIntBetween(2, 10); + final int numReplicas = randomIntBetween(0, 1); + createIndex(indexName, numShards, numReplicas); + + indexRandom(true, indexName, randomIntBetween(100, 300)); + + final String repositoryName = randomIdentifier(); + createRepository(repositoryName, "mock"); + // Block the snapshot to test "snapshot shards in progress" + blockAllDataNodes(repositoryName); + + final String snapshotName = randomIdentifier(); + try { + clusterAdmin().prepareCreateSnapshot(TEST_REQUEST_TIMEOUT, repositoryName, snapshotName) + .setIndices(indexName) + .setWaitForCompletion(false) + .get(); + + waitForBlockOnAnyDataNode(repositoryName); + + // Should be {numShards} in INIT state + Map shardStates = getShardStates(); + assertThat(shardStates.get(SnapshotsInProgress.ShardState.INIT), equalTo((long) numShards)); + + // Queue up another snapshot + clusterAdmin().prepareCreateSnapshot(TEST_REQUEST_TIMEOUT, repositoryName, randomIdentifier()) + .setIndices(indexName) + .setWaitForCompletion(false) + .get(); + + // Should be {numShards} in QUEUED state + shardStates = getShardStates(); + assertThat(shardStates.get(SnapshotsInProgress.ShardState.QUEUED), equalTo((long) numShards)); + } finally { + unblockAllDataNodes(repositoryName); + } + + // All statuses should return to zero when the snapshots complete + awaitNumberOfSnapshotsInProgress(0); + getShardStates().forEach((key, value) -> assertThat(value, equalTo(0L))); + } + + public void testShardsByStateCounts_PausedForRemoval() throws Exception { + final String indexName = randomIdentifier(); + final int numShards = randomIntBetween(2, 10); + final int numReplicas = randomIntBetween(0, 1); + + final String nodeForRemoval = internalCluster().startDataOnlyNode(); + + createIndex( + indexName, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) + .put(IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_PREFIX + "._name", nodeForRemoval) + .build() + ); + indexRandom(true, indexName, randomIntBetween(1000, 3000)); + + final String repositoryName = randomIdentifier(); + createRepository(repositoryName, "mock"); + + // block the node to be removed + blockNodeOnAnyFiles(repositoryName, nodeForRemoval); + + final ClusterService clusterService = internalCluster().getCurrentMasterNodeInstance(ClusterService.class); + try { + // Kick off a snapshot + clusterAdmin().prepareCreateSnapshot(TEST_REQUEST_TIMEOUT, repositoryName, randomIdentifier()) + .setIndices(indexName) + .setWaitForCompletion(false) + .get(); + + // Wait till we're blocked + waitForBlock(nodeForRemoval, repositoryName); + + // Put shutdown metadata + putShutdownForRemovalMetadata(nodeForRemoval, clusterService); + } finally { + unblockAllDataNodes(repositoryName); + } + + // Wait for snapshot to be paused + safeAwait(createSnapshotPausedListener(clusterService, repositoryName, indexName, numShards)); + + final Map shardStates = getShardStates(); + assertThat(shardStates.get(SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL), equalTo((long) numShards)); + + // clear shutdown metadata to allow snapshot to complete + clearShutdownMetadata(clusterService); + + // All statuses should return to zero when the snapshots complete + awaitNumberOfSnapshotsInProgress(0); + getShardStates().forEach((key, value) -> assertThat(value, equalTo(0L))); + } + + private Map getShardStates() { + collectMetrics(); + + return allTestTelemetryPlugins().map(testTelemetryPlugin -> { + final List longGaugeMeasurement = testTelemetryPlugin.getLongGaugeMeasurement( + SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATUS + ); + final Map shardStates = new HashMap<>(); + // last one in wins + for (Measurement measurement : longGaugeMeasurement) { + shardStates.put( + SnapshotsInProgress.ShardState.valueOf(measurement.attributes().get("state").toString()), + measurement.getLong() + ); + } + return shardStates; + }).reduce(Map.of(), this::combineCounts); + } + + private Map combineCounts( + Map lhs, + Map rhs + ) { + final Map result = new HashMap<>(); + Stream.of(lhs, rhs) + .forEach( + countMap -> countMap.forEach( + (status, count) -> result.compute(status, (state, current) -> current == null ? count : current + count) + ) + ); + return result; + } + private static void assertMetricsHaveAttributes( InstrumentType instrumentType, String metricName, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotShutdownIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotShutdownIT.java index c1d9977993b49..c57fc25635b6c 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotShutdownIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotShutdownIT.java @@ -22,12 +22,9 @@ import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse; import org.elasticsearch.action.support.ActionTestUtils; import org.elasticsearch.action.support.SubscribableListener; -import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.SnapshotDeletionsInProgress; import org.elasticsearch.cluster.SnapshotsInProgress; import org.elasticsearch.cluster.metadata.IndexMetadata; -import org.elasticsearch.cluster.metadata.NodesShutdownMetadata; import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.cluster.service.ClusterService; @@ -46,12 +43,10 @@ import org.elasticsearch.test.transport.MockTransportService; import java.util.Collection; -import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; -import java.util.stream.Stream; import static org.elasticsearch.snapshots.SnapshotShutdownProgressTracker.SNAPSHOT_PROGRESS_DURING_SHUTDOWN_LOG_INTERVAL_SETTING; import static org.hamcrest.Matchers.containsString; @@ -659,41 +654,6 @@ && switch (shardEntry.getValue().state()) { resetMockLog(); } - private static SubscribableListener createSnapshotPausedListener( - ClusterService clusterService, - String repoName, - String indexName, - int numShards - ) { - return ClusterServiceUtils.addTemporaryStateListener(clusterService, state -> { - final var entriesForRepo = SnapshotsInProgress.get(state).forRepo(repoName); - if (entriesForRepo.isEmpty()) { - // it's (just about) possible for the data node to apply the initial snapshot state, start on the first shard snapshot, and - // hit the IO block, before the master even applies this cluster state, in which case we simply retry: - return false; - } - assertThat(entriesForRepo, hasSize(1)); - final var shardSnapshotStatuses = entriesForRepo.iterator() - .next() - .shards() - .entrySet() - .stream() - .flatMap(e -> e.getKey().getIndexName().equals(indexName) ? Stream.of(e.getValue()) : Stream.of()) - .toList(); - assertThat(shardSnapshotStatuses, hasSize(numShards)); - for (var shardStatus : shardSnapshotStatuses) { - assertThat( - shardStatus.state(), - oneOf(SnapshotsInProgress.ShardState.INIT, SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL) - ); - if (shardStatus.state() == SnapshotsInProgress.ShardState.INIT) { - return false; - } - } - return true; - }); - } - private static void addUnassignedShardsWatcher(ClusterService clusterService, String indexName) { ClusterServiceUtils.addTemporaryStateListener(clusterService, state -> { final var indexRoutingTable = state.routingTable().index(indexName); @@ -705,98 +665,4 @@ private static void addUnassignedShardsWatcher(ClusterService clusterService, St return false; }); } - - private static void putShutdownForRemovalMetadata(String nodeName, ClusterService clusterService) { - safeAwait((ActionListener listener) -> putShutdownForRemovalMetadata(clusterService, nodeName, listener)); - } - - private static void flushMasterQueue(ClusterService clusterService, ActionListener listener) { - clusterService.submitUnbatchedStateUpdateTask("flush queue", new ClusterStateUpdateTask(Priority.LANGUID) { - @Override - public ClusterState execute(ClusterState currentState) { - return currentState; - } - - @Override - public void onFailure(Exception e) { - fail(e); - } - - @Override - public void clusterStateProcessed(ClusterState initialState, ClusterState newState) { - listener.onResponse(null); - } - }); - } - - private static void putShutdownForRemovalMetadata(ClusterService clusterService, String nodeName, ActionListener listener) { - // not testing REPLACE just because it requires us to specify the replacement node - final var shutdownType = randomFrom(SingleNodeShutdownMetadata.Type.REMOVE, SingleNodeShutdownMetadata.Type.SIGTERM); - final var shutdownMetadata = SingleNodeShutdownMetadata.builder() - .setType(shutdownType) - .setStartedAtMillis(clusterService.threadPool().absoluteTimeInMillis()) - .setReason("test"); - switch (shutdownType) { - case SIGTERM -> shutdownMetadata.setGracePeriod(TimeValue.timeValueSeconds(60)); - } - SubscribableListener - - .newForked(l -> putShutdownMetadata(clusterService, shutdownMetadata, nodeName, l)) - .andThen(l -> flushMasterQueue(clusterService, l)) - .addListener(listener); - } - - private static void putShutdownMetadata( - ClusterService clusterService, - SingleNodeShutdownMetadata.Builder shutdownMetadataBuilder, - String nodeName, - ActionListener listener - ) { - clusterService.submitUnbatchedStateUpdateTask("mark node for removal", new ClusterStateUpdateTask() { - @Override - public ClusterState execute(ClusterState currentState) { - final var node = currentState.nodes().resolveNode(nodeName); - return currentState.copyAndUpdateMetadata( - mdb -> mdb.putCustom( - NodesShutdownMetadata.TYPE, - new NodesShutdownMetadata( - Map.of( - node.getId(), - shutdownMetadataBuilder.setNodeId(node.getId()).setNodeEphemeralId(node.getEphemeralId()).build() - ) - ) - ) - ); - } - - @Override - public void onFailure(Exception e) { - fail(e); - } - - @Override - public void clusterStateProcessed(ClusterState initialState, ClusterState newState) { - listener.onResponse(null); - } - }); - } - - private static void clearShutdownMetadata(ClusterService clusterService) { - safeAwait(listener -> clusterService.submitUnbatchedStateUpdateTask("remove restart marker", new ClusterStateUpdateTask() { - @Override - public ClusterState execute(ClusterState currentState) { - return currentState.copyAndUpdateMetadata(mdb -> mdb.putCustom(NodesShutdownMetadata.TYPE, NodesShutdownMetadata.EMPTY)); - } - - @Override - public void onFailure(Exception e) { - fail(e); - } - - @Override - public void clusterStateProcessed(ClusterState initialState, ClusterState newState) { - listener.onResponse(null); - } - })); - } } diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java index ead163eaf26a4..84d2230b2db7f 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java @@ -17,17 +17,21 @@ import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.support.GroupedActionListener; import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.SnapshotDeletionsInProgress; import org.elasticsearch.cluster.SnapshotsInProgress; +import org.elasticsearch.cluster.metadata.NodesShutdownMetadata; import org.elasticsearch.cluster.metadata.ProjectId; import org.elasticsearch.cluster.metadata.RepositoriesMetadata; import org.elasticsearch.cluster.metadata.RepositoryMetadata; +import org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.CheckedBiConsumer; +import org.elasticsearch.common.Priority; import org.elasticsearch.common.Strings; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.bytes.BytesReference; @@ -35,6 +39,7 @@ import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.plugins.Plugin; @@ -52,6 +57,7 @@ import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.snapshots.mockstore.MockRepository; +import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.threadpool.ThreadPool; @@ -80,6 +86,7 @@ import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; import java.util.function.Function; +import java.util.stream.Stream; import java.util.stream.StreamSupport; import static org.elasticsearch.repositories.blobstore.BlobStoreRepository.READONLY_SETTING_KEY; @@ -91,6 +98,7 @@ import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.oneOf; public abstract class AbstractSnapshotIntegTestCase extends ESIntegTestCase { @@ -701,6 +709,135 @@ protected List createNSnapshots(String repoName, int count) throws Excep return createNSnapshots(logger, repoName, count); } + protected static void putShutdownForRemovalMetadata(String nodeName, ClusterService clusterService) { + safeAwait((ActionListener listener) -> putShutdownForRemovalMetadata(clusterService, nodeName, listener)); + } + + protected static void flushMasterQueue(ClusterService clusterService, ActionListener listener) { + clusterService.submitUnbatchedStateUpdateTask("flush queue", new ClusterStateUpdateTask(Priority.LANGUID) { + @Override + public ClusterState execute(ClusterState currentState) { + return currentState; + } + + @Override + public void onFailure(Exception e) { + fail(e); + } + + @Override + public void clusterStateProcessed(ClusterState initialState, ClusterState newState) { + listener.onResponse(null); + } + }); + } + + protected static void putShutdownForRemovalMetadata(ClusterService clusterService, String nodeName, ActionListener listener) { + // not testing REPLACE just because it requires us to specify the replacement node + final var shutdownType = randomFrom(SingleNodeShutdownMetadata.Type.REMOVE, SingleNodeShutdownMetadata.Type.SIGTERM); + final var shutdownMetadata = SingleNodeShutdownMetadata.builder() + .setType(shutdownType) + .setStartedAtMillis(clusterService.threadPool().absoluteTimeInMillis()) + .setReason("test"); + switch (shutdownType) { + case SIGTERM -> shutdownMetadata.setGracePeriod(TimeValue.timeValueSeconds(60)); + } + SubscribableListener + + .newForked(l -> putShutdownMetadata(clusterService, shutdownMetadata, nodeName, l)) + .andThen(l -> flushMasterQueue(clusterService, l)) + .addListener(listener); + } + + protected static void putShutdownMetadata( + ClusterService clusterService, + SingleNodeShutdownMetadata.Builder shutdownMetadataBuilder, + String nodeName, + ActionListener listener + ) { + clusterService.submitUnbatchedStateUpdateTask("mark node for removal", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + final var node = currentState.nodes().resolveNode(nodeName); + return currentState.copyAndUpdateMetadata( + mdb -> mdb.putCustom( + NodesShutdownMetadata.TYPE, + new NodesShutdownMetadata( + Map.of( + node.getId(), + shutdownMetadataBuilder.setNodeId(node.getId()).setNodeEphemeralId(node.getEphemeralId()).build() + ) + ) + ) + ); + } + + @Override + public void onFailure(Exception e) { + fail(e); + } + + @Override + public void clusterStateProcessed(ClusterState initialState, ClusterState newState) { + listener.onResponse(null); + } + }); + } + + protected static void clearShutdownMetadata(ClusterService clusterService) { + safeAwait(listener -> clusterService.submitUnbatchedStateUpdateTask("remove restart marker", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + return currentState.copyAndUpdateMetadata(mdb -> mdb.putCustom(NodesShutdownMetadata.TYPE, NodesShutdownMetadata.EMPTY)); + } + + @Override + public void onFailure(Exception e) { + fail(e); + } + + @Override + public void clusterStateProcessed(ClusterState initialState, ClusterState newState) { + listener.onResponse(null); + } + })); + } + + protected static SubscribableListener createSnapshotPausedListener( + ClusterService clusterService, + String repoName, + String indexName, + int numShards + ) { + return ClusterServiceUtils.addTemporaryStateListener(clusterService, state -> { + final var entriesForRepo = SnapshotsInProgress.get(state).forRepo(repoName); + if (entriesForRepo.isEmpty()) { + // it's (just about) possible for the data node to apply the initial snapshot state, start on the first shard snapshot, and + // hit the IO block, before the master even applies this cluster state, in which case we simply retry: + return false; + } + assertThat(entriesForRepo, hasSize(1)); + final var shardSnapshotStatuses = entriesForRepo.iterator() + .next() + .shards() + .entrySet() + .stream() + .flatMap(e -> e.getKey().getIndexName().equals(indexName) ? Stream.of(e.getValue()) : Stream.of()) + .toList(); + assertThat(shardSnapshotStatuses, hasSize(numShards)); + for (var shardStatus : shardSnapshotStatuses) { + assertThat( + shardStatus.state(), + oneOf(SnapshotsInProgress.ShardState.INIT, SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL) + ); + if (shardStatus.state() == SnapshotsInProgress.ShardState.INIT) { + return false; + } + } + return true; + }); + } + public static List createNSnapshots(Logger logger, String repoName, int count) throws Exception { final PlainActionFuture> allSnapshotsDone = new PlainActionFuture<>(); final ActionListener snapshotsListener = new GroupedActionListener<>(count, allSnapshotsDone); From ca0a3c91f08522ba59a3092d899c9f6b16d8a621 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 15:44:46 +0930 Subject: [PATCH 37/65] Add test for WAITING state --- .../repositories/SnapshotMetricsIT.java | 77 ++++++++++++++++++- .../AbstractSnapshotIntegTestCase.java | 25 ++++-- 2 files changed, 92 insertions(+), 10 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index e763d72c4f84c..481d217b73e66 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -18,10 +18,10 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.common.util.Maps; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus; +import org.elasticsearch.indices.recovery.PeerRecoveryTargetService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; @@ -31,13 +31,16 @@ import org.elasticsearch.telemetry.RecordingMeterRegistry; import org.elasticsearch.telemetry.TestTelemetryPlugin; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.transport.MockTransportService; import org.elasticsearch.threadpool.ThreadPool; import org.hamcrest.Matcher; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.concurrent.CyclicBarrier; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -56,9 +59,14 @@ @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST) public class SnapshotMetricsIT extends AbstractSnapshotIntegTestCase { + private static final String REQUIRE_NODE_NAME_SETTING = IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_PREFIX + "._name"; + @Override protected Collection> nodePlugins() { - return CollectionUtils.appendToCopy(super.nodePlugins(), TestTelemetryPlugin.class); + var plugins = new HashSet<>(super.nodePlugins()); + plugins.add(TestTelemetryPlugin.class); + plugins.add(MockTransportService.TestPlugin.class); + return plugins; } @Override @@ -277,7 +285,7 @@ public void testShardsByStateCounts_PausedForRemoval() throws Exception { Settings.builder() .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards) .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numReplicas) - .put(IndexMetadata.INDEX_ROUTING_REQUIRE_GROUP_PREFIX + "._name", nodeForRemoval) + .put(REQUIRE_NODE_NAME_SETTING, nodeForRemoval) .build() ); indexRandom(true, indexName, randomIntBetween(1000, 3000)); @@ -314,7 +322,68 @@ public void testShardsByStateCounts_PausedForRemoval() throws Exception { // clear shutdown metadata to allow snapshot to complete clearShutdownMetadata(clusterService); - // All statuses should return to zero when the snapshots complete + // All statuses should return to zero when the snapshot completes + awaitNumberOfSnapshotsInProgress(0); + getShardStates().forEach((key, value) -> assertThat(value, equalTo(0L))); + } + + public void testShardsByStateCounts_Waiting() throws Exception { + final String indexName = randomIdentifier(); + final String boundNode = internalCluster().startDataOnlyNode(); + final String destinationNode = internalCluster().startDataOnlyNode(); + + // Create with single shard so we can reliably delay relocation + createIndex( + indexName, + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(REQUIRE_NODE_NAME_SETTING, boundNode) + .build() + ); + indexRandom(true, indexName, randomIntBetween(100, 300)); + + final String repositoryName = randomIdentifier(); + createRepository(repositoryName, "mock"); + + final MockTransportService transportService = MockTransportService.getInstance(destinationNode); + final CyclicBarrier handoffRequestBarrier = new CyclicBarrier(2); + transportService.addRequestHandlingBehavior( + PeerRecoveryTargetService.Actions.HANDOFF_PRIMARY_CONTEXT, + (handler, request, channel, task) -> { + safeAwait(handoffRequestBarrier); + safeAwait(handoffRequestBarrier); + handler.messageReceived(request, channel, task); + } + ); + + // Force the index to move to another node + client().admin() + .indices() + .prepareUpdateSettings(indexName) + .setSettings(Settings.builder().put(REQUIRE_NODE_NAME_SETTING, destinationNode).build()) + .get(); + + // Wait for hand-off request to be blocked (the shard should be relocating now) + safeAwait(handoffRequestBarrier); + + // Kick off a snapshot + clusterAdmin().prepareCreateSnapshot(TEST_REQUEST_TIMEOUT, repositoryName, randomIdentifier()) + .setIndices(indexName) + .setWaitForCompletion(false) + .get(); + + // Wait till we see a shard in WAITING state + createSnapshotInStateListener(clusterService(), repositoryName, indexName, 1, SnapshotsInProgress.ShardState.WAITING); + + // Metrics should have a shard in waiting state + final Map shardStates = getShardStates(); + assertThat(shardStates.get(SnapshotsInProgress.ShardState.WAITING), equalTo(1L)); + + // allow the relocation to complete + safeAwait(handoffRequestBarrier); + + // All statuses should return to zero when the snapshot completes awaitNumberOfSnapshotsInProgress(0); getShardStates().forEach((key, value) -> assertThat(value, equalTo(0L))); } diff --git a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java index 84d2230b2db7f..5b912e3bb3527 100644 --- a/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/snapshots/AbstractSnapshotIntegTestCase.java @@ -803,11 +803,12 @@ public void clusterStateProcessed(ClusterState initialState, ClusterState newSta })); } - protected static SubscribableListener createSnapshotPausedListener( + protected static SubscribableListener createSnapshotInStateListener( ClusterService clusterService, String repoName, String indexName, - int numShards + int numShards, + SnapshotsInProgress.ShardState shardState ) { return ClusterServiceUtils.addTemporaryStateListener(clusterService, state -> { final var entriesForRepo = SnapshotsInProgress.get(state).forRepo(repoName); @@ -826,10 +827,7 @@ protected static SubscribableListener createSnapshotPausedListener( .toList(); assertThat(shardSnapshotStatuses, hasSize(numShards)); for (var shardStatus : shardSnapshotStatuses) { - assertThat( - shardStatus.state(), - oneOf(SnapshotsInProgress.ShardState.INIT, SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL) - ); + assertThat(shardStatus.state(), oneOf(SnapshotsInProgress.ShardState.INIT, shardState)); if (shardStatus.state() == SnapshotsInProgress.ShardState.INIT) { return false; } @@ -838,6 +836,21 @@ protected static SubscribableListener createSnapshotPausedListener( }); } + protected static SubscribableListener createSnapshotPausedListener( + ClusterService clusterService, + String repoName, + String indexName, + int numShards + ) { + return createSnapshotInStateListener( + clusterService, + repoName, + indexName, + numShards, + SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL + ); + } + public static List createNSnapshots(Logger logger, String repoName, int count) throws Exception { final PlainActionFuture> allSnapshotsDone = new PlainActionFuture<>(); final ActionListener snapshotsListener = new GroupedActionListener<>(count, allSnapshotsDone); From 9115b1872723272e446bf2a370ace71fcc8466c7 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 15:45:59 +0930 Subject: [PATCH 38/65] Refine description of tracked snapshot states --- .../java/org/elasticsearch/snapshots/SnapshotsService.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 48bcf53e194f7..a9f01767b9439 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -169,8 +169,9 @@ public final class SnapshotsService extends AbstractLifecycleComponent implement /** * We publish metrics of how many shards are in each of the following states - * these should be the list of status that potentially block movement in - * {@link SnapshotInProgressAllocationDecider} + * these should be the list of statuses that potentially block movement in + * {@link SnapshotInProgressAllocationDecider}, or states that might delay + * a snapshot's completion. */ private static final List TRACKED_SHARD_STATES = List.of( ShardState.INIT, From cddf3b3ead70fd0532e82eb01a02d034da5a2f91 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 15:55:40 +0930 Subject: [PATCH 39/65] Tidy --- .../repositories/SnapshotMetricsIT.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 481d217b73e66..54f9136e879c4 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -24,6 +24,7 @@ import org.elasticsearch.indices.recovery.PeerRecoveryTargetService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.PluginsService; +import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.snapshots.AbstractSnapshotIntegTestCase; import org.elasticsearch.snapshots.SnapshotState; import org.elasticsearch.telemetry.InstrumentType; @@ -37,7 +38,6 @@ import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.concurrent.CyclicBarrier; @@ -63,10 +63,8 @@ public class SnapshotMetricsIT extends AbstractSnapshotIntegTestCase { @Override protected Collection> nodePlugins() { - var plugins = new HashSet<>(super.nodePlugins()); - plugins.add(TestTelemetryPlugin.class); - plugins.add(MockTransportService.TestPlugin.class); - return plugins; + return Stream.concat(super.nodePlugins().stream(), Stream.of(TestTelemetryPlugin.class, MockTransportService.TestPlugin.class)) + .toList(); } @Override @@ -102,9 +100,13 @@ public void testSnapshotAPMMetrics() throws Exception { repositoryName, "mock", randomRepositorySettings().put( - "max_snapshot_bytes_per_sec", + BlobStoreRepository.MAX_SNAPSHOT_BYTES_PER_SEC.getKey(), ByteSizeValue.ofBytes(totalSizeInBytes * shardSizeMultipleToEnsureThrottling) - ).put("max_restore_bytes_per_sec", ByteSizeValue.ofBytes(totalSizeInBytes * shardSizeMultipleToEnsureThrottling)) + ) + .put( + BlobStoreRepository.MAX_RESTORE_BYTES_PER_SEC.getKey(), + ByteSizeValue.ofBytes(totalSizeInBytes * shardSizeMultipleToEnsureThrottling) + ) ); // Block the snapshot to test "snapshot shards in progress" From 03e9d815faecbfce31dd97bedf396a9e58175f68 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 16:38:40 +0930 Subject: [PATCH 40/65] Use humanReadableField --- .../repositories/RepositoriesStats.java | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java index a4cffad1e8d6c..c1feb2710e406 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java @@ -97,12 +97,16 @@ public SnapshotStats(long totalReadThrottledNanos, long totalWriteThrottledNanos @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - if (builder.humanReadable()) { - builder.field("total_read_throttled_time", new TimeValue(totalReadThrottledNanos, TimeUnit.NANOSECONDS)); - builder.field("total_write_throttled_time", new TimeValue(totalWriteThrottledNanos, TimeUnit.NANOSECONDS)); - } - builder.field("total_read_throttled_time_nanos", totalReadThrottledNanos); - builder.field("total_write_throttled_time_nanos", totalWriteThrottledNanos); + builder.humanReadableField( + "total_read_throttled_time_nanos", + "total_read_throttled_time", + new TimeValue(totalReadThrottledNanos, TimeUnit.NANOSECONDS) + ); + builder.humanReadableField( + "total_write_throttled_time_nanos", + "total_write_throttled_time", + new TimeValue(totalWriteThrottledNanos, TimeUnit.NANOSECONDS) + ); if (shardSnapshotsStarted != -1) { builder.field("shard_snapshots_started", shardSnapshotsStarted); } @@ -116,25 +120,21 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field("blobs_uploaded", numberOfBlobsUploaded); } if (numberOfBytesUploaded != -1) { - if (builder.humanReadable()) { - builder.field("bytes_uploaded", ByteSizeValue.ofBytes(numberOfBytesUploaded)); - } else { - builder.field("bytes_uploaded", numberOfBytesUploaded); - } + builder.humanReadableField("bytes_uploaded", "bytes_uploaded", ByteSizeValue.ofBytes(numberOfBytesUploaded)); } if (totalUploadTimeInNanos != -1) { - if (builder.humanReadable()) { - builder.field("total_upload_time", TimeValue.timeValueNanos(totalUploadTimeInNanos)); - } else { - builder.field("total_upload_time_in_nanos", totalUploadTimeInNanos); - } + builder.humanReadableField( + "total_upload_time_in_millis", + "total_upload_time", + TimeValue.timeValueNanos(totalUploadTimeInNanos) + ); } if (totalUploadReadTimeInNanos != -1) { - if (builder.humanReadable()) { - builder.field("total_read_time", TimeValue.timeValueNanos(totalUploadReadTimeInNanos)); - } else { - builder.field("total_read_time_in_nanos", totalUploadReadTimeInNanos); - } + builder.humanReadableField( + "total_read_time_in_millis", + "total_read_time", + TimeValue.timeValueNanos(totalUploadReadTimeInNanos) + ); } builder.endObject(); return builder; From c4a0d67f2a7a9525473a2d36daaef661ac43ff34 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 16:42:38 +0930 Subject: [PATCH 41/65] Better names for uploaded size/blobs --- .../org/elasticsearch/repositories/RepositoriesStats.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java index c1feb2710e406..6386e7cb68277 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java @@ -117,10 +117,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field("shard_snapshots_in_progress", shardSnapshotsInProgress); } if (numberOfBlobsUploaded != -1) { - builder.field("blobs_uploaded", numberOfBlobsUploaded); + builder.field("uploaded_blobs", numberOfBlobsUploaded); } if (numberOfBytesUploaded != -1) { - builder.humanReadableField("bytes_uploaded", "bytes_uploaded", ByteSizeValue.ofBytes(numberOfBytesUploaded)); + builder.humanReadableField("uploaded_size_in_bytes", "uploaded_size", ByteSizeValue.ofBytes(numberOfBytesUploaded)); } if (totalUploadTimeInNanos != -1) { builder.humanReadableField( From 3f8762d33c6ef2b6abcaaea208d6cd4ecb79da29 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 17:35:18 +0930 Subject: [PATCH 42/65] Assert common attributes for shards-by-status metrics --- .../repositories/SnapshotMetricsIT.java | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 54f9136e879c4..6ffa1296bb2f4 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -241,7 +241,7 @@ public void testShardsByStateCounts_InitAndQueued() throws Exception { final String repositoryName = randomIdentifier(); createRepository(repositoryName, "mock"); - // Block the snapshot to test "snapshot shards in progress" + // Block repo reads so we can queue snapshots blockAllDataNodes(repositoryName); final String snapshotName = randomIdentifier(); @@ -273,6 +273,13 @@ public void testShardsByStateCounts_InitAndQueued() throws Exception { // All statuses should return to zero when the snapshots complete awaitNumberOfSnapshotsInProgress(0); getShardStates().forEach((key, value) -> assertThat(value, equalTo(0L))); + + // Ensure all common attributes are present + assertMetricsHaveAttributes( + InstrumentType.LONG_GAUGE, + SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATUS, + Map.of("project_id", ProjectId.DEFAULT.id(), "repo_name", repositoryName, "repo_type", "mock") + ); } public void testShardsByStateCounts_PausedForRemoval() throws Exception { @@ -327,6 +334,13 @@ public void testShardsByStateCounts_PausedForRemoval() throws Exception { // All statuses should return to zero when the snapshot completes awaitNumberOfSnapshotsInProgress(0); getShardStates().forEach((key, value) -> assertThat(value, equalTo(0L))); + + // Ensure all common attributes are present + assertMetricsHaveAttributes( + InstrumentType.LONG_GAUGE, + SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATUS, + Map.of("project_id", ProjectId.DEFAULT.id(), "repo_name", repositoryName, "repo_type", "mock") + ); } public void testShardsByStateCounts_Waiting() throws Exception { @@ -388,6 +402,13 @@ public void testShardsByStateCounts_Waiting() throws Exception { // All statuses should return to zero when the snapshot completes awaitNumberOfSnapshotsInProgress(0); getShardStates().forEach((key, value) -> assertThat(value, equalTo(0L))); + + // Ensure all common attributes are present + assertMetricsHaveAttributes( + InstrumentType.LONG_GAUGE, + SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATUS, + Map.of("project_id", ProjectId.DEFAULT.id(), "repo_name", repositoryName, "repo_type", "mock") + ); } private Map getShardStates() { From 46f892affeee199a38cd22559fb4dc7a9b03beea Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 17:43:33 +0930 Subject: [PATCH 43/65] Reduce number of documents indexed --- .../java/org/elasticsearch/repositories/SnapshotMetricsIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 6ffa1296bb2f4..17d6e0e7c12e1 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -297,7 +297,7 @@ public void testShardsByStateCounts_PausedForRemoval() throws Exception { .put(REQUIRE_NODE_NAME_SETTING, nodeForRemoval) .build() ); - indexRandom(true, indexName, randomIntBetween(1000, 3000)); + indexRandom(true, indexName, randomIntBetween(100, 300)); final String repositoryName = randomIdentifier(); createRepository(repositoryName, "mock"); From 8bf2475da2fe9f2f483907d95ecca0b83a9d600b Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 18:08:30 +0930 Subject: [PATCH 44/65] Use millis rather than nanos when measuring/counting upload/read time --- .../repositories/SnapshotMetricsIT.java | 20 +++++++------------ .../snapshots/RepositorySnapshotStatsIT.java | 6 +++--- .../repositories/RepositoriesStats.java | 16 +++++++-------- .../repositories/SnapshotMetrics.java | 4 ++-- .../blobstore/BlobStoreRepository.java | 16 +++++++-------- .../blobstore/BlobStoreSnapshotMetrics.java | 16 +++++++-------- 6 files changed, 36 insertions(+), 42 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 17d6e0e7c12e1..732d128655021 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -133,7 +133,7 @@ public void testSnapshotAPMMetrics() throws Exception { // wait for snapshot to finish to test the other metrics awaitNumberOfSnapshotsInProgress(0); - final long snapshotElapsedTimeNanos = System.nanoTime() - beforeCreateSnapshotNanos; + final TimeValue snapshotElapsedTime = TimeValue.timeValueNanos(System.nanoTime() - beforeCreateSnapshotNanos); collectMetrics(); // sanity check blobs, bytes and throttling metrics @@ -147,17 +147,11 @@ public void testSnapshotAPMMetrics() throws Exception { // Sanity check shard duration observations assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_SHARDS_DURATION, hasSize(numShards)); - assertDoubleHistogramMetrics( - SnapshotMetrics.SNAPSHOT_SHARDS_DURATION, - everyItem(lessThan(TimeValue.timeValueNanos(snapshotElapsedTimeNanos).secondsFrac())) - ); + assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_SHARDS_DURATION, everyItem(lessThan(snapshotElapsedTime.secondsFrac()))); // Sanity check snapshot observations assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_DURATION, hasSize(1)); - assertDoubleHistogramMetrics( - SnapshotMetrics.SNAPSHOT_DURATION, - everyItem(lessThan(TimeValue.timeValueNanos(snapshotElapsedTimeNanos).secondsFrac())) - ); + assertDoubleHistogramMetrics(SnapshotMetrics.SNAPSHOT_DURATION, everyItem(lessThan(snapshotElapsedTime.secondsFrac()))); // Work out the maximum amount of concurrency per node final ThreadPool tp = internalCluster().getDataNodeInstance(ThreadPool.class); @@ -165,15 +159,15 @@ public void testSnapshotAPMMetrics() throws Exception { final int maximumPerNodeConcurrency = Math.max(snapshotThreadPoolSize, numShards); // sanity check duration values - final long upperBoundTimeSpentOnSnapshotThingsNanos = internalCluster().numDataNodes() * maximumPerNodeConcurrency - * snapshotElapsedTimeNanos; + final long upperBoundTimeSpentOnSnapshotThingsMillis = internalCluster().numDataNodes() * maximumPerNodeConcurrency + * snapshotElapsedTime.millis(); assertThat( getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_UPLOAD_DURATION), - allOf(greaterThan(0L), lessThan(upperBoundTimeSpentOnSnapshotThingsNanos)) + allOf(greaterThan(0L), lessThan(upperBoundTimeSpentOnSnapshotThingsMillis)) ); assertThat( getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_UPLOAD_READ_DURATION), - allOf(greaterThan(0L), lessThan(upperBoundTimeSpentOnSnapshotThingsNanos)) + allOf(greaterThan(0L), lessThan(upperBoundTimeSpentOnSnapshotThingsMillis)) ); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_STARTED), equalTo((long) numShards)); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java index bb43cbb674c42..1d2ad3835e5fe 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java @@ -85,8 +85,8 @@ public void testRepositorySnapshotStats() { assertThat(snapshotStats.shardSnapshotsInProgress(), equalTo(0L)); assertThat(snapshotStats.numberOfBlobsUploaded(), greaterThan(0L)); assertThat(snapshotStats.numberOfBytesUploaded(), greaterThan(0L)); - assertThat(snapshotStats.totalUploadTimeInNanos(), greaterThan(0L)); - assertThat(snapshotStats.totalUploadReadTimeInNanos(), greaterThan(0L)); - assertThat(snapshotStats.totalUploadReadTimeInNanos(), lessThan(snapshotStats.totalUploadTimeInNanos())); + assertThat(snapshotStats.totalUploadTimeInMillis(), greaterThan(0L)); + assertThat(snapshotStats.totalUploadReadTimeInMillis(), greaterThan(0L)); + assertThat(snapshotStats.totalUploadReadTimeInMillis(), lessThan(snapshotStats.totalUploadTimeInMillis())); } } diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java index 6386e7cb68277..9ba61df71631a 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java @@ -66,8 +66,8 @@ public record SnapshotStats( long totalWriteThrottledNanos, long numberOfBlobsUploaded, long numberOfBytesUploaded, - long totalUploadTimeInNanos, - long totalUploadReadTimeInNanos + long totalUploadTimeInMillis, + long totalUploadReadTimeInMillis ) implements ToXContentObject, Writeable { public static SnapshotStats readFrom(StreamInput in) throws IOException { @@ -122,18 +122,18 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (numberOfBytesUploaded != -1) { builder.humanReadableField("uploaded_size_in_bytes", "uploaded_size", ByteSizeValue.ofBytes(numberOfBytesUploaded)); } - if (totalUploadTimeInNanos != -1) { + if (totalUploadTimeInMillis != -1) { builder.humanReadableField( "total_upload_time_in_millis", "total_upload_time", - TimeValue.timeValueNanos(totalUploadTimeInNanos) + TimeValue.timeValueMillis(totalUploadTimeInMillis) ); } - if (totalUploadReadTimeInNanos != -1) { + if (totalUploadReadTimeInMillis != -1) { builder.humanReadableField( "total_read_time_in_millis", "total_read_time", - TimeValue.timeValueNanos(totalUploadReadTimeInNanos) + TimeValue.timeValueMillis(totalUploadReadTimeInMillis) ); } builder.endObject(); @@ -150,8 +150,8 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(shardSnapshotsInProgress); out.writeLong(numberOfBlobsUploaded); out.writeLong(numberOfBytesUploaded); - out.writeLong(totalUploadTimeInNanos); - out.writeLong(totalUploadReadTimeInNanos); + out.writeLong(totalUploadTimeInMillis); + out.writeLong(totalUploadReadTimeInMillis); } } } diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java index 823c90becc75d..2b3b43e689c2b 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -64,8 +64,8 @@ public SnapshotMetrics(MeterRegistry meterRegistry) { meterRegistry.registerDoubleHistogram(SNAPSHOT_SHARDS_DURATION, "shard snapshots duration", "s"), meterRegistry.registerLongCounter(SNAPSHOT_BLOBS_UPLOADED, "snapshot blobs uploaded", "unit"), meterRegistry.registerLongCounter(SNAPSHOT_BYTES_UPLOADED, "snapshot bytes uploaded", "bytes"), - meterRegistry.registerLongCounter(SNAPSHOT_UPLOAD_DURATION, "snapshot upload duration", "ns"), - meterRegistry.registerLongCounter(SNAPSHOT_UPLOAD_READ_DURATION, "time spent in read() calls when snapshotting", "ns"), + meterRegistry.registerLongCounter(SNAPSHOT_UPLOAD_DURATION, "snapshot upload duration", "ms"), + meterRegistry.registerLongCounter(SNAPSHOT_UPLOAD_READ_DURATION, "time spent in read() calls when snapshotting", "ms"), meterRegistry.registerLongCounter(SNAPSHOT_CREATE_THROTTLE_DURATION, "time throttled in snapshot create", "bytes"), meterRegistry.registerLongCounter(SNAPSHOT_RESTORE_THROTTLE_DURATION, "time throttled in snapshot restore", "bytes"), meterRegistry diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 29ba11e4dd19f..b9fe812981559 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -4140,18 +4140,18 @@ protected void snapshotFile(SnapshotShardContext context, FileInfo fileInfo) thr @Override public int read() throws IOException { checkAborted(); - final long beforeReadNanos = System.nanoTime(); + final long beforeReadMillis = threadPool.rawRelativeTimeInMillis(); int value = super.read(); - blobStoreSnapshotMetrics.incrementUploadReadTime(System.nanoTime() - beforeReadNanos); + blobStoreSnapshotMetrics.incrementUploadReadTime(threadPool.rawRelativeTimeInMillis() - beforeReadMillis); return value; } @Override public int read(byte[] b, int off, int len) throws IOException { checkAborted(); - final long beforeReadNanos = System.nanoTime(); + final long beforeReadMillis = threadPool.rawRelativeTimeInMillis(); int amountRead = super.read(b, off, len); - blobStoreSnapshotMetrics.incrementUploadReadTime(System.nanoTime() - beforeReadNanos); + blobStoreSnapshotMetrics.incrementUploadReadTime(threadPool.rawRelativeTimeInMillis() - beforeReadMillis); return amountRead; } @@ -4161,17 +4161,17 @@ private void checkAborted() { }; final String partName = fileInfo.partName(i); logger.trace("[{}] Writing [{}] to [{}]", metadata.name(), partName, shardContainer.path()); - final long startNanos = System.nanoTime(); + final long startMillis = threadPool.relativeTimeInMillis(); shardContainer.writeBlob(OperationPurpose.SNAPSHOT_DATA, partName, inputStream, partBytes, false); - final long uploadTimeInNanos = System.nanoTime() - startNanos; - blobStoreSnapshotMetrics.incrementCountersForPartUpload(partBytes, uploadTimeInNanos); + final long uploadTimeInMillis = threadPool.relativeTimeInMillis() - startMillis; + blobStoreSnapshotMetrics.incrementCountersForPartUpload(partBytes, uploadTimeInMillis); logger.trace( "[{}] Writing [{}] of size [{}b] to [{}] took [{}ms]", metadata.name(), partName, partBytes, shardContainer.path(), - TimeUnit.NANOSECONDS.toMillis(uploadTimeInNanos) + uploadTimeInMillis ); } blobStoreSnapshotMetrics.incrementNumberOfBlobsUploaded(); diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java index 860bef7c9ca76..16b7007fa041c 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -29,7 +29,7 @@ public class BlobStoreSnapshotMetrics { private final CounterMetric restoreRateLimitingTimeInNanos = new CounterMetric(); private final CounterMetric numberOfBlobsUploaded = new CounterMetric(); private final CounterMetric numberOfBytesUploaded = new CounterMetric(); - private final CounterMetric uploadTimeInNanos = new CounterMetric(); + private final CounterMetric uploadTimeInMillis = new CounterMetric(); private final CounterMetric uploadReadTimeInNanos = new CounterMetric(); private final CounterMetric numberOfShardSnapshotsStarted = new CounterMetric(); private final CounterMetric numberOfShardSnapshotsCompleted = new CounterMetric(); @@ -64,11 +64,11 @@ public long restoreRateLimitingTimeInNanos() { return restoreRateLimitingTimeInNanos.count(); } - public void incrementCountersForPartUpload(long partSizeInBytes, long partWriteTimeNanos) { + public void incrementCountersForPartUpload(long partSizeInBytes, long partWriteTimeMillis) { snapshotMetrics.bytesUploadedCounter().incrementBy(partSizeInBytes, metricAttributes); - snapshotMetrics.uploadDurationCounter().incrementBy(partWriteTimeNanos, metricAttributes); + snapshotMetrics.uploadDurationCounter().incrementBy(partWriteTimeMillis, metricAttributes); numberOfBytesUploaded.inc(partSizeInBytes); - uploadTimeInNanos.inc(partWriteTimeNanos); + uploadTimeInMillis.inc(partWriteTimeMillis); } public void incrementNumberOfBlobsUploaded() { @@ -90,9 +90,9 @@ public void shardSnapshotCompleted(IndexShardSnapshotStatus status) { shardSnapshotsInProgress.dec(); } - public void incrementUploadReadTime(long readTimeInNanos) { - snapshotMetrics.uploadReadDurationCounter().incrementBy(readTimeInNanos, metricAttributes); - uploadReadTimeInNanos.inc(readTimeInNanos); + public void incrementUploadReadTime(long readTimeInMillis) { + snapshotMetrics.uploadReadDurationCounter().incrementBy(readTimeInMillis, metricAttributes); + uploadReadTimeInNanos.inc(readTimeInMillis); } public LongWithAttributes getShardSnapshotsInProgress() { @@ -108,7 +108,7 @@ public RepositoriesStats.SnapshotStats getSnapshotStats() { snapshotRateLimitingTimeInNanos.count(), numberOfBlobsUploaded.count(), numberOfBytesUploaded.count(), - uploadTimeInNanos.count(), + uploadTimeInMillis.count(), uploadReadTimeInNanos.count() ); } From 02d11fd5fdb33a6719d972a66efe5f5a8f80cef3 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 18:14:36 +0930 Subject: [PATCH 45/65] Write throttle time as nanos (can't use humanReadableField because it writes in millis) --- .../repositories/RepositoriesStats.java | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java index 9ba61df71631a..ca314a36ddc7f 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java @@ -97,16 +97,12 @@ public SnapshotStats(long totalReadThrottledNanos, long totalWriteThrottledNanos @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - builder.humanReadableField( - "total_read_throttled_time_nanos", - "total_read_throttled_time", - new TimeValue(totalReadThrottledNanos, TimeUnit.NANOSECONDS) - ); - builder.humanReadableField( - "total_write_throttled_time_nanos", - "total_write_throttled_time", - new TimeValue(totalWriteThrottledNanos, TimeUnit.NANOSECONDS) - ); + if (builder.humanReadable()) { + builder.field("total_read_throttled_time", new TimeValue(totalReadThrottledNanos, TimeUnit.NANOSECONDS)); + builder.field("total_write_throttled_time", new TimeValue(totalWriteThrottledNanos, TimeUnit.NANOSECONDS)); + } + builder.field("total_read_throttled_time_nanos", totalReadThrottledNanos); + builder.field("total_write_throttled_time_nanos", totalWriteThrottledNanos); if (shardSnapshotsStarted != -1) { builder.field("shard_snapshots_started", shardSnapshotsStarted); } From 4a8fb9d31b17b6422172fe346877e3621aff7aee Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 18:19:01 +0930 Subject: [PATCH 46/65] Include unit in IndexShardSnapshotStatus#(startTime|totalTime) --- .../snapshots/IndexShardSnapshotStatus.java | 36 +++++++++---------- .../blobstore/BlobStoreSnapshotMetrics.java | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java b/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java index d9b662b573ec2..2d429613438a1 100644 --- a/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java +++ b/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java @@ -88,8 +88,8 @@ public enum AbortStatus { private final AtomicReference stage; private final AtomicReference generation; private final AtomicReference shardSnapshotResult; // only set in stage DONE - private long startTime; - private long totalTime; + private long startTimeMillis; + private long totalTimeMillis; private int incrementalFileCount; private int totalFileCount; private int processedFileCount; @@ -102,8 +102,8 @@ public enum AbortStatus { private IndexShardSnapshotStatus( final Stage stage, - final long startTime, - final long totalTime, + final long startTimeMillis, + final long totalTimeMillis, final int incrementalFileCount, final int totalFileCount, final int processedFileCount, @@ -117,8 +117,8 @@ private IndexShardSnapshotStatus( this.stage = new AtomicReference<>(Objects.requireNonNull(stage)); this.generation = new AtomicReference<>(generation); this.shardSnapshotResult = new AtomicReference<>(); - this.startTime = startTime; - this.totalTime = totalTime; + this.startTimeMillis = startTimeMillis; + this.totalTimeMillis = totalTimeMillis; this.incrementalFileCount = incrementalFileCount; this.totalFileCount = totalFileCount; this.processedFileCount = processedFileCount; @@ -130,14 +130,14 @@ private IndexShardSnapshotStatus( } public synchronized Copy moveToStarted( - final long startTime, + final long startTimeMillis, final int incrementalFileCount, final int totalFileCount, final long incrementalSize, final long totalSize ) { if (stage.compareAndSet(Stage.INIT, Stage.STARTED)) { - this.startTime = startTime; + this.startTimeMillis = startTimeMillis; this.incrementalFileCount = incrementalFileCount; this.totalFileCount = totalFileCount; this.incrementalSize = incrementalSize; @@ -172,11 +172,11 @@ public synchronized Copy moveToFinalize() { }; } - public synchronized void moveToDone(final long endTime, final ShardSnapshotResult shardSnapshotResult) { + public synchronized void moveToDone(final long endTimeMillis, final ShardSnapshotResult shardSnapshotResult) { assert shardSnapshotResult != null; assert shardSnapshotResult.getGeneration() != null; if (stage.compareAndSet(Stage.FINALIZE, Stage.DONE)) { - this.totalTime = Math.max(0L, endTime - startTime); + this.totalTimeMillis = Math.max(0L, endTimeMillis - startTimeMillis); this.shardSnapshotResult.set(shardSnapshotResult); this.generation.set(shardSnapshotResult.getGeneration()); } else { @@ -191,8 +191,8 @@ public Stage getStage() { return stage.get(); } - public long getTotalTime() { - return totalTime; + public long getTotalTimeMillis() { + return totalTimeMillis; } public void addAbortListener(ActionListener listener) { @@ -225,7 +225,7 @@ private synchronized void abortAndMoveToStageIfNotCompleted( public synchronized SnapshotsInProgress.ShardState moveToUnsuccessful(final Stage newStage, final String failure, final long endTime) { assert newStage == Stage.PAUSED || newStage == Stage.FAILURE : newStage; if (newStage == Stage.PAUSED && stage.compareAndSet(Stage.PAUSING, Stage.PAUSED)) { - this.totalTime = Math.max(0L, endTime - startTime); + this.totalTimeMillis = Math.max(0L, endTime - startTimeMillis); this.failure = failure; return SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL; } @@ -237,7 +237,7 @@ public synchronized SnapshotsInProgress.ShardState moveToUnsuccessful(final Stag public synchronized void moveToFailed(final long endTime, final String failure) { if (stage.getAndSet(Stage.FAILURE) != Stage.FAILURE) { abortListeners.onResponse(AbortStatus.NO_ABORT); - this.totalTime = Math.max(0L, endTime - startTime); + this.totalTimeMillis = Math.max(0L, endTime - startTimeMillis); this.failure = failure; } } @@ -297,8 +297,8 @@ public void updateStatusDescription(String statusString) { public synchronized IndexShardSnapshotStatus.Copy asCopy() { return new IndexShardSnapshotStatus.Copy( stage.get(), - startTime, - totalTime, + startTimeMillis, + totalTimeMillis, incrementalFileCount, totalFileCount, processedFileCount, @@ -471,9 +471,9 @@ public String toString() { + "stage=" + stage + ", startTime=" - + startTime + + startTimeMillis + ", totalTime=" - + totalTime + + totalTimeMillis + ", incrementalFileCount=" + incrementalFileCount + ", totalFileCount=" diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java index 16b7007fa041c..aa157085e612c 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -85,7 +85,7 @@ public void shardSnapshotStarted() { public void shardSnapshotCompleted(IndexShardSnapshotStatus status) { final Map attrsWithStage = Maps.copyMapWithAddedEntry(metricAttributes, "stage", status.getStage().name()); snapshotMetrics.shardsCompletedCounter().incrementBy(1, attrsWithStage); - snapshotMetrics.shardsDurationHistogram().record(status.getTotalTime() / 1_000f, attrsWithStage); + snapshotMetrics.shardsDurationHistogram().record(status.getTotalTimeMillis() / 1_000d, attrsWithStage); numberOfShardSnapshotsCompleted.inc(); shardSnapshotsInProgress.dec(); } From e344693d6f2f4d2596fdeec6aeb02202d2f1e644 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 18:39:11 +0930 Subject: [PATCH 47/65] Use VLong to encode extended fields, zero for BWC --- .../repositories/RepositoriesStats.java | 74 ++++++++----------- 1 file changed, 30 insertions(+), 44 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java index ca314a36ddc7f..1e6eb7ce659e3 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java @@ -75,15 +75,15 @@ public static SnapshotStats readFrom(StreamInput in) throws IOException { final long totalWriteThrottledNanos = in.readVLong(); if (in.getTransportVersion().onOrAfter(TransportVersions.EXTENDED_SNAPSHOT_STATS_IN_NODE_INFO)) { return new SnapshotStats( - in.readLong(), - in.readLong(), - in.readLong(), + in.readVLong(), + in.readVLong(), + in.readVLong(), totalReadThrottledNanos, totalWriteThrottledNanos, - in.readLong(), - in.readLong(), - in.readLong(), - in.readLong() + in.readVLong(), + in.readVLong(), + in.readVLong(), + in.readVLong() ); } else { return new SnapshotStats(totalReadThrottledNanos, totalWriteThrottledNanos); @@ -91,7 +91,7 @@ public static SnapshotStats readFrom(StreamInput in) throws IOException { } public SnapshotStats(long totalReadThrottledNanos, long totalWriteThrottledNanos) { - this(-1, -1, -1, totalReadThrottledNanos, totalWriteThrottledNanos, -1, -1, -1, -1); + this(0, 0, 0, totalReadThrottledNanos, totalWriteThrottledNanos, 0, 0, 0, 0); } @Override @@ -103,35 +103,21 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } builder.field("total_read_throttled_time_nanos", totalReadThrottledNanos); builder.field("total_write_throttled_time_nanos", totalWriteThrottledNanos); - if (shardSnapshotsStarted != -1) { - builder.field("shard_snapshots_started", shardSnapshotsStarted); - } - if (shardSnapshotsCompleted != -1) { - builder.field("shard_snapshots_completed", shardSnapshotsCompleted); - } - if (shardSnapshotsInProgress != -1) { - builder.field("shard_snapshots_in_progress", shardSnapshotsInProgress); - } - if (numberOfBlobsUploaded != -1) { - builder.field("uploaded_blobs", numberOfBlobsUploaded); - } - if (numberOfBytesUploaded != -1) { - builder.humanReadableField("uploaded_size_in_bytes", "uploaded_size", ByteSizeValue.ofBytes(numberOfBytesUploaded)); - } - if (totalUploadTimeInMillis != -1) { - builder.humanReadableField( - "total_upload_time_in_millis", - "total_upload_time", - TimeValue.timeValueMillis(totalUploadTimeInMillis) - ); - } - if (totalUploadReadTimeInMillis != -1) { - builder.humanReadableField( - "total_read_time_in_millis", - "total_read_time", - TimeValue.timeValueMillis(totalUploadReadTimeInMillis) - ); - } + builder.field("shard_snapshots_started", shardSnapshotsStarted); + builder.field("shard_snapshots_completed", shardSnapshotsCompleted); + builder.field("shard_snapshots_in_progress", shardSnapshotsInProgress); + builder.field("uploaded_blobs", numberOfBlobsUploaded); + builder.humanReadableField("uploaded_size_in_bytes", "uploaded_size", ByteSizeValue.ofBytes(numberOfBytesUploaded)); + builder.humanReadableField( + "total_upload_time_in_millis", + "total_upload_time", + TimeValue.timeValueMillis(totalUploadTimeInMillis) + ); + builder.humanReadableField( + "total_read_time_in_millis", + "total_read_time", + TimeValue.timeValueMillis(totalUploadReadTimeInMillis) + ); builder.endObject(); return builder; } @@ -141,13 +127,13 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(totalReadThrottledNanos); out.writeVLong(totalWriteThrottledNanos); if (out.getTransportVersion().onOrAfter(TransportVersions.EXTENDED_SNAPSHOT_STATS_IN_NODE_INFO)) { - out.writeLong(shardSnapshotsStarted); - out.writeLong(shardSnapshotsCompleted); - out.writeLong(shardSnapshotsInProgress); - out.writeLong(numberOfBlobsUploaded); - out.writeLong(numberOfBytesUploaded); - out.writeLong(totalUploadTimeInMillis); - out.writeLong(totalUploadReadTimeInMillis); + out.writeVLong(shardSnapshotsStarted); + out.writeVLong(shardSnapshotsCompleted); + out.writeVLong(shardSnapshotsInProgress); + out.writeVLong(numberOfBlobsUploaded); + out.writeVLong(numberOfBytesUploaded); + out.writeVLong(totalUploadTimeInMillis); + out.writeVLong(totalUploadReadTimeInMillis); } } } From ff821584bc5bb889042fc7d5f60bc758ef70693d Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 18:52:11 +0930 Subject: [PATCH 48/65] Remove default getShardSnapshotsInProgress --- .../org/elasticsearch/repositories/FilterRepository.java | 6 ++++++ .../org/elasticsearch/repositories/InvalidRepository.java | 6 ++++++ .../java/org/elasticsearch/repositories/Repository.java | 4 +--- .../elasticsearch/repositories/UnknownTypeRepository.java | 6 ++++++ .../repositories/RepositoriesServiceTests.java | 6 ++++++ .../elasticsearch/index/shard/RestoreOnlyRepository.java | 6 ++++++ .../elasticsearch/xpack/ccr/repository/CcrRepository.java | 6 ++++++ 7 files changed, 37 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java b/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java index b0a55d1f66411..5e9f77abe6075 100644 --- a/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java @@ -26,6 +26,7 @@ import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; +import org.elasticsearch.telemetry.metric.LongWithAttributes; import java.io.IOException; import java.util.Collection; @@ -175,6 +176,11 @@ public void awaitIdle() { in.awaitIdle(); } + @Override + public LongWithAttributes getShardSnapshotsInProgress() { + return in.getShardSnapshotsInProgress(); + } + @Override public Lifecycle.State lifecycleState() { return in.lifecycleState(); diff --git a/server/src/main/java/org/elasticsearch/repositories/InvalidRepository.java b/server/src/main/java/org/elasticsearch/repositories/InvalidRepository.java index 37612065ebe58..6cf1a4bc4947d 100644 --- a/server/src/main/java/org/elasticsearch/repositories/InvalidRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/InvalidRepository.java @@ -25,6 +25,7 @@ import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; +import org.elasticsearch.telemetry.metric.LongWithAttributes; import java.io.IOException; import java.util.Collection; @@ -180,6 +181,11 @@ public void awaitIdle() { } + @Override + public LongWithAttributes getShardSnapshotsInProgress() { + return null; + } + @Override protected void doStart() { diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index 3e0901d39cbde..e46f7e702387a 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -361,9 +361,7 @@ static boolean assertSnapshotMetaThread() { * @return The current number of shard snapshots in progress metric value, or null if this repository doesn't track that */ @Nullable - default LongWithAttributes getShardSnapshotsInProgress() { - return null; - } + LongWithAttributes getShardSnapshotsInProgress(); default RepositoriesStats.SnapshotStats getSnapshotStats() { return new RepositoriesStats.SnapshotStats(getRestoreThrottleTimeInNanos(), getSnapshotThrottleTimeInNanos()); diff --git a/server/src/main/java/org/elasticsearch/repositories/UnknownTypeRepository.java b/server/src/main/java/org/elasticsearch/repositories/UnknownTypeRepository.java index 9750666c8c8a9..7e523e09df22b 100644 --- a/server/src/main/java/org/elasticsearch/repositories/UnknownTypeRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/UnknownTypeRepository.java @@ -25,6 +25,7 @@ import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; +import org.elasticsearch.telemetry.metric.LongWithAttributes; import java.io.IOException; import java.util.Collection; @@ -178,6 +179,11 @@ public void awaitIdle() { } + @Override + public LongWithAttributes getShardSnapshotsInProgress() { + return null; + } + @Override protected void doStart() { diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java index fec5869afd487..4ef19f2eb23ce 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java @@ -53,6 +53,7 @@ import org.elasticsearch.repositories.blobstore.MeteredBlobStoreRepository; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; +import org.elasticsearch.telemetry.metric.LongWithAttributes; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; @@ -772,6 +773,11 @@ public void cloneShardSnapshot( @Override public void awaitIdle() {} + @Override + public LongWithAttributes getShardSnapshotsInProgress() { + return null; + } + @Override public Lifecycle.State lifecycleState() { return null; diff --git a/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java b/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java index a3e8f6e84600f..a3ed47ba5ed90 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java @@ -31,6 +31,7 @@ import org.elasticsearch.repositories.SnapshotShardContext; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; +import org.elasticsearch.telemetry.metric.LongWithAttributes; import java.util.Collection; import java.util.Collections; @@ -175,4 +176,9 @@ public void cloneShardSnapshot( throw new UnsupportedOperationException("Unsupported for restore-only repository"); } + + @Override + public LongWithAttributes getShardSnapshotsInProgress() { + return null; + } } diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java index 469f4c4d81998..b596717d9634e 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java @@ -86,6 +86,7 @@ import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotInfo; import org.elasticsearch.snapshots.SnapshotState; +import org.elasticsearch.telemetry.metric.LongWithAttributes; import org.elasticsearch.threadpool.Scheduler; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.RemoteClusterService; @@ -618,6 +619,11 @@ public void cloneShardSnapshot( @Override public void awaitIdle() {} + @Override + public LongWithAttributes getShardSnapshotsInProgress() { + return null; + } + private void updateMappings( RemoteClusterClient leaderClient, Index leaderIndex, From 0d9a62d323d125be333b80474e5ad2625dc1222b Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 19:28:29 +0930 Subject: [PATCH 49/65] Remove default Repository#getSnapshotStats(), Repository#getSnapshotThrottleTimeInNanos() and Repository#getRestoreThrottleTimeInNanos() --- .../snapshots/RestoreSnapshotIT.java | 2 +- .../snapshots/SnapshotThrottlingIT.java | 6 ++++-- .../repositories/FilterRepository.java | 15 +++++---------- .../repositories/InvalidRepository.java | 15 +++++---------- .../repositories/RepositoriesStats.java | 2 ++ .../elasticsearch/repositories/Repository.java | 14 +------------- .../repositories/UnknownTypeRepository.java | 15 +++++---------- .../blobstore/BlobStoreRepository.java | 18 ++++-------------- .../repositories/RepositoriesServiceTests.java | 15 +++++---------- .../index/shard/RestoreOnlyRepository.java | 16 ++++++---------- .../xpack/ccr/CcrRepositoryIT.java | 2 +- .../xpack/ccr/repository/CcrRepository.java | 16 ++++++---------- .../SearchableSnapshotsIntegTests.java | 2 +- 13 files changed, 46 insertions(+), 92 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RestoreSnapshotIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RestoreSnapshotIT.java index 5db77fa1f0f42..953cddba0ab7a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RestoreSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RestoreSnapshotIT.java @@ -639,7 +639,7 @@ public void testDynamicRestoreThrottling() throws Exception { assertBusy(() -> { long restorePause = 0L; for (RepositoriesService repositoriesService : internalCluster().getDataNodeInstances(RepositoriesService.class)) { - restorePause += repositoriesService.repository("test-repo").getRestoreThrottleTimeInNanos(); + restorePause += repositoriesService.repository("test-repo").getSnapshotStats().totalReadThrottledNanos(); } assertThat(restorePause, greaterThan(TimeValue.timeValueSeconds(randomIntBetween(1, 5)).nanos())); assertFalse(restoreSnapshotResponse.isDone()); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotThrottlingIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotThrottlingIT.java index 80ade63624675..ff74e5b0a86cf 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotThrottlingIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/SnapshotThrottlingIT.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.core.Tuple; import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.repositories.RepositoriesStats; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.MockLog; @@ -59,8 +60,9 @@ private Tuple testThrottledRepository(String maxSnapshotBytesPerSec, long snapshotPause = 0L; long restorePause = 0L; for (RepositoriesService repositoriesService : internalCluster().getDataNodeInstances(RepositoriesService.class)) { - snapshotPause += repositoriesService.repository("test-repo").getSnapshotThrottleTimeInNanos(); - restorePause += repositoriesService.repository("test-repo").getRestoreThrottleTimeInNanos(); + final RepositoriesStats.SnapshotStats snapshotStats = repositoriesService.repository("test-repo").getSnapshotStats(); + snapshotPause += snapshotStats.totalWriteThrottledNanos(); + restorePause += snapshotStats.totalReadThrottledNanos(); } cluster().wipeIndices("test2-idx"); logger.warn("--> tested throttled repository with snapshot pause [{}] and restore pause [{}]", snapshotPause, restorePause); diff --git a/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java b/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java index 5e9f77abe6075..abafef470cb8c 100644 --- a/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/FilterRepository.java @@ -98,16 +98,6 @@ public void deleteSnapshots( in.deleteSnapshots(snapshotIds, repositoryDataGeneration, minimumNodeVersion, repositoryDataUpdateListener, onCompletion); } - @Override - public long getSnapshotThrottleTimeInNanos() { - return in.getSnapshotThrottleTimeInNanos(); - } - - @Override - public long getRestoreThrottleTimeInNanos() { - return in.getRestoreThrottleTimeInNanos(); - } - @Override public String startVerification() { return in.startVerification(); @@ -181,6 +171,11 @@ public LongWithAttributes getShardSnapshotsInProgress() { return in.getShardSnapshotsInProgress(); } + @Override + public RepositoriesStats.SnapshotStats getSnapshotStats() { + return in.getSnapshotStats(); + } + @Override public Lifecycle.State lifecycleState() { return in.lifecycleState(); diff --git a/server/src/main/java/org/elasticsearch/repositories/InvalidRepository.java b/server/src/main/java/org/elasticsearch/repositories/InvalidRepository.java index 6cf1a4bc4947d..9bdaeb0933243 100644 --- a/server/src/main/java/org/elasticsearch/repositories/InvalidRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/InvalidRepository.java @@ -107,16 +107,6 @@ public void deleteSnapshots( repositoryDataUpdateListener.onFailure(createCreationException()); } - @Override - public long getSnapshotThrottleTimeInNanos() { - throw createCreationException(); - } - - @Override - public long getRestoreThrottleTimeInNanos() { - throw createCreationException(); - } - @Override public String startVerification() { throw createCreationException(); @@ -186,6 +176,11 @@ public LongWithAttributes getShardSnapshotsInProgress() { return null; } + @Override + public RepositoriesStats.SnapshotStats getSnapshotStats() { + throw createCreationException(); + } + @Override protected void doStart() { diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java index 1e6eb7ce659e3..f5dec24bd550e 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesStats.java @@ -70,6 +70,8 @@ public record SnapshotStats( long totalUploadReadTimeInMillis ) implements ToXContentObject, Writeable { + public static final SnapshotStats ZERO = new SnapshotStats(0, 0); + public static SnapshotStats readFrom(StreamInput in) throws IOException { final long totalReadThrottledNanos = in.readVLong(); final long totalWriteThrottledNanos = in.readVLong(); diff --git a/server/src/main/java/org/elasticsearch/repositories/Repository.java b/server/src/main/java/org/elasticsearch/repositories/Repository.java index e46f7e702387a..35dd7f1b00626 100644 --- a/server/src/main/java/org/elasticsearch/repositories/Repository.java +++ b/server/src/main/java/org/elasticsearch/repositories/Repository.java @@ -206,16 +206,6 @@ void deleteSnapshots( Runnable onCompletion ); - /** - * Returns snapshot throttle time in nanoseconds - */ - long getSnapshotThrottleTimeInNanos(); - - /** - * Returns restore throttle time in nanoseconds - */ - long getRestoreThrottleTimeInNanos(); - /** * Returns stats on the repository usage */ @@ -363,7 +353,5 @@ static boolean assertSnapshotMetaThread() { @Nullable LongWithAttributes getShardSnapshotsInProgress(); - default RepositoriesStats.SnapshotStats getSnapshotStats() { - return new RepositoriesStats.SnapshotStats(getRestoreThrottleTimeInNanos(), getSnapshotThrottleTimeInNanos()); - } + RepositoriesStats.SnapshotStats getSnapshotStats(); } diff --git a/server/src/main/java/org/elasticsearch/repositories/UnknownTypeRepository.java b/server/src/main/java/org/elasticsearch/repositories/UnknownTypeRepository.java index 7e523e09df22b..d77f7836d20bb 100644 --- a/server/src/main/java/org/elasticsearch/repositories/UnknownTypeRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/UnknownTypeRepository.java @@ -105,16 +105,6 @@ public void deleteSnapshots( repositoryDataUpdateListener.onFailure(createUnknownTypeException()); } - @Override - public long getSnapshotThrottleTimeInNanos() { - throw createUnknownTypeException(); - } - - @Override - public long getRestoreThrottleTimeInNanos() { - throw createUnknownTypeException(); - } - @Override public String startVerification() { throw createUnknownTypeException(); @@ -184,6 +174,11 @@ public LongWithAttributes getShardSnapshotsInProgress() { return null; } + @Override + public RepositoriesStats.SnapshotStats getSnapshotStats() { + throw createUnknownTypeException(); + } + @Override protected void doStart() { diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index b9fe812981559..59cc6032f1433 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -2203,16 +2203,6 @@ RateLimiter getRestoreRateLimiter() { ); } - @Override - public long getSnapshotThrottleTimeInNanos() { - return blobStoreSnapshotMetrics.snapshotRateLimitingTimeInNanos(); - } - - @Override - public long getRestoreThrottleTimeInNanos() { - return blobStoreSnapshotMetrics.restoreRateLimitingTimeInNanos(); - } - private void assertSnapshotOrStatelessPermittedThreadPool() { // The Stateless plugin adds custom thread pools for object store operations assert ThreadPool.assertCurrentThreadPool( @@ -3777,7 +3767,7 @@ private static InputStream maybeRateLimit( /** * Wrap the restore rate limiter (controlled by the repository setting `max_restore_bytes_per_sec` and the cluster setting * `indices.recovery.max_bytes_per_sec`) around the given stream. Any throttling is reported to the given listener and not otherwise - * recorded in the value returned by {@link BlobStoreRepository#getRestoreThrottleTimeInNanos}. + * recorded in the value returned by {@link RepositoriesStats.SnapshotStats#totalReadThrottledNanos()}. */ public InputStream maybeRateLimitRestores(InputStream stream) { return maybeRateLimitRestores(stream, blobStoreSnapshotMetrics::incrementRestoreRateLimitingTimeInNanos); @@ -3786,7 +3776,7 @@ public InputStream maybeRateLimitRestores(InputStream stream) { /** * Wrap the restore rate limiter (controlled by the repository setting `max_restore_bytes_per_sec` and the cluster setting * `indices.recovery.max_bytes_per_sec`) around the given stream. Any throttling is recorded in the value returned by {@link - * BlobStoreRepository#getRestoreThrottleTimeInNanos}. + * RepositoriesStats.SnapshotStats#totalReadThrottledNanos()}. */ public InputStream maybeRateLimitRestores(InputStream stream, RateLimitingInputStream.Listener throttleListener) { return maybeRateLimit( @@ -3798,7 +3788,7 @@ public InputStream maybeRateLimitRestores(InputStream stream, RateLimitingInputS /** * Wrap the snapshot rate limiter around the given stream. Any throttling is recorded in the value returned by - * {@link BlobStoreRepository#getSnapshotThrottleTimeInNanos()}. Note that speed is throttled by the repository setting + * {@link RepositoriesStats.SnapshotStats#totalWriteThrottledNanos()}. Note that speed is throttled by the repository setting * `max_snapshot_bytes_per_sec` and, if recovery node bandwidth settings have been set, additionally by the * `indices.recovery.max_bytes_per_sec` speed. */ @@ -3808,7 +3798,7 @@ public InputStream maybeRateLimitSnapshots(InputStream stream) { /** * Wrap the snapshot rate limiter around the given stream. Any throttling is recorded in the value returned by - * {@link BlobStoreRepository#getSnapshotThrottleTimeInNanos()}. Note that speed is throttled by the repository setting + * {@link RepositoriesStats.SnapshotStats#totalWriteThrottledNanos()}. Note that speed is throttled by the repository setting * `max_snapshot_bytes_per_sec` and, if recovery node bandwidth settings have been set, additionally by the * `indices.recovery.max_bytes_per_sec` speed. */ diff --git a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java index 4ef19f2eb23ce..1617001019ec3 100644 --- a/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/repositories/RepositoriesServiceTests.java @@ -702,16 +702,6 @@ public void deleteSnapshots( repositoryDataUpdateListener.onFailure(new UnsupportedOperationException()); } - @Override - public long getSnapshotThrottleTimeInNanos() { - return 0; - } - - @Override - public long getRestoreThrottleTimeInNanos() { - return 0; - } - @Override public String startVerification() { return null; @@ -778,6 +768,11 @@ public LongWithAttributes getShardSnapshotsInProgress() { return null; } + @Override + public RepositoriesStats.SnapshotStats getSnapshotStats() { + return RepositoriesStats.SnapshotStats.ZERO; + } + @Override public Lifecycle.State lifecycleState() { return null; diff --git a/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java b/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java index a3ed47ba5ed90..d7322cb337f7f 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java +++ b/test/framework/src/main/java/org/elasticsearch/index/shard/RestoreOnlyRepository.java @@ -22,6 +22,7 @@ import org.elasticsearch.repositories.FinalizeSnapshotContext; import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.IndexMetaDataGenerations; +import org.elasticsearch.repositories.RepositoriesStats; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryData; import org.elasticsearch.repositories.RepositoryShardId; @@ -125,16 +126,6 @@ public void deleteSnapshots( repositoryDataUpdateListener.onFailure(new UnsupportedOperationException()); } - @Override - public long getSnapshotThrottleTimeInNanos() { - return 0; - } - - @Override - public long getRestoreThrottleTimeInNanos() { - return 0; - } - @Override public String startVerification() { return null; @@ -181,4 +172,9 @@ public void cloneShardSnapshot( public LongWithAttributes getShardSnapshotsInProgress() { return null; } + + @Override + public RepositoriesStats.SnapshotStats getSnapshotStats() { + return RepositoriesStats.SnapshotStats.ZERO; + } } diff --git a/x-pack/plugin/ccr/src/internalClusterTest/java/org/elasticsearch/xpack/ccr/CcrRepositoryIT.java b/x-pack/plugin/ccr/src/internalClusterTest/java/org/elasticsearch/xpack/ccr/CcrRepositoryIT.java index 7c47237d35bd5..bffcc39561b85 100644 --- a/x-pack/plugin/ccr/src/internalClusterTest/java/org/elasticsearch/xpack/ccr/CcrRepositoryIT.java +++ b/x-pack/plugin/ccr/src/internalClusterTest/java/org/elasticsearch/xpack/ccr/CcrRepositoryIT.java @@ -315,7 +315,7 @@ public void testRateLimitingIsEmployed() throws Exception { startRestore(clusterService, restoreService, restoreRequest).actionGet(); if (followerRateLimiting) { - assertTrue(repositories.stream().anyMatch(cr -> cr.getRestoreThrottleTimeInNanos() > 0)); + assertTrue(repositories.stream().anyMatch(cr -> cr.getSnapshotStats().totalReadThrottledNanos() > 0)); } else { assertTrue(restoreSources.stream().anyMatch(cr -> cr.getThrottleTime() > 0)); } diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java index b596717d9634e..9cad087a34e0f 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/repository/CcrRepository.java @@ -73,6 +73,7 @@ import org.elasticsearch.repositories.FinalizeSnapshotContext; import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.IndexMetaDataGenerations; +import org.elasticsearch.repositories.RepositoriesStats; import org.elasticsearch.repositories.Repository; import org.elasticsearch.repositories.RepositoryData; import org.elasticsearch.repositories.RepositoryShardId; @@ -390,16 +391,6 @@ public void deleteSnapshots( repositoryDataUpdateListener.onFailure(new UnsupportedOperationException("Unsupported for repository of type: " + TYPE)); } - @Override - public long getSnapshotThrottleTimeInNanos() { - throw new UnsupportedOperationException("Unsupported for repository of type: " + TYPE); - } - - @Override - public long getRestoreThrottleTimeInNanos() { - return throttledTime.count(); - } - @Override public String startVerification() { throw new UnsupportedOperationException("Unsupported for repository of type: " + TYPE); @@ -624,6 +615,11 @@ public LongWithAttributes getShardSnapshotsInProgress() { return null; } + @Override + public RepositoriesStats.SnapshotStats getSnapshotStats() { + return new RepositoriesStats.SnapshotStats(throttledTime.count(), 0); + } + private void updateMappings( RemoteClusterClient leaderClient, Index leaderIndex, diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsIntegTests.java index 16543a30238b3..0fe63df106862 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsIntegTests.java @@ -496,7 +496,7 @@ public void testMaxRestoreBytesPerSecIsUsed() throws Exception { final IndicesService service = internalCluster().getInstance(IndicesService.class, node); if (service != null && service.hasIndex(restoredIndex)) { assertThat( - getRepositoryOnNode(repositoryName, node).getRestoreThrottleTimeInNanos(), + getRepositoryOnNode(repositoryName, node).getSnapshotStats().totalReadThrottledNanos(), useRateLimits ? greaterThan(0L) : equalTo(0L) ); } From b665a49aad835ce81f66c40a11bfde265b8d6332 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 19:45:14 +0930 Subject: [PATCH 50/65] Tidy up close listener creation --- .../elasticsearch/repositories/SnapshotShardContext.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotShardContext.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotShardContext.java index d0b2aca3774ec..42c91f8b7edf6 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotShardContext.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotShardContext.java @@ -68,7 +68,8 @@ public SnapshotShardContext( final long snapshotStartTime, ActionListener listener ) { - super(createListener(commitRef.closingBefore(listener))); + super(new SubscribableListener<>()); + addListener(commitRef.closingBefore(listener)); this.store = store; this.mapperService = mapperService; this.snapshotId = snapshotId; @@ -80,12 +81,6 @@ public SnapshotShardContext( this.snapshotStartTime = snapshotStartTime; } - private static SubscribableListener createListener(ActionListener listener) { - final SubscribableListener objectSubscribableListener = new SubscribableListener<>(); - objectSubscribableListener.addListener(listener); - return objectSubscribableListener; - } - public Store store() { return store; } From 7d9b36dff005659a282dd61c46cbd7ce5887106a Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 19:55:49 +0930 Subject: [PATCH 51/65] Explain why we use seconds for duration histograms --- .../java/org/elasticsearch/repositories/SnapshotMetrics.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java index 2b3b43e689c2b..480b6ea58def3 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -58,9 +58,13 @@ public SnapshotMetrics(MeterRegistry meterRegistry) { this( meterRegistry.registerLongCounter(SNAPSHOTS_STARTED, "snapshots started", "unit"), meterRegistry.registerLongCounter(SNAPSHOTS_COMPLETED, "snapshots completed", "unit"), + // We use seconds rather than milliseconds due to the limitations of the default bucket boundaries + // see https://www.elastic.co/docs/reference/apm/agents/java/config-metrics#config-custom-metrics-histogram-boundaries meterRegistry.registerDoubleHistogram(SNAPSHOT_DURATION, "snapshots duration", "s"), meterRegistry.registerLongCounter(SNAPSHOT_SHARDS_STARTED, "shard snapshots started", "unit"), meterRegistry.registerLongCounter(SNAPSHOT_SHARDS_COMPLETED, "shard snapshots completed", "unit"), + // We use seconds rather than milliseconds due to the limitations of the default bucket boundaries + // see https://www.elastic.co/docs/reference/apm/agents/java/config-metrics#config-custom-metrics-histogram-boundaries meterRegistry.registerDoubleHistogram(SNAPSHOT_SHARDS_DURATION, "shard snapshots duration", "s"), meterRegistry.registerLongCounter(SNAPSHOT_BLOBS_UPLOADED, "snapshot blobs uploaded", "unit"), meterRegistry.registerLongCounter(SNAPSHOT_BYTES_UPLOADED, "snapshot bytes uploaded", "bytes"), From e9f32a0b8aa2510a1d2e4edb9fcdad81d4d9d41b Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 20:01:10 +0930 Subject: [PATCH 52/65] Track all snapshot shard statuses --- .../org/elasticsearch/snapshots/SnapshotsService.java | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index a9f01767b9439..6605b893c9d48 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -62,7 +62,6 @@ import org.elasticsearch.cluster.routing.RerouteService; import org.elasticsearch.cluster.routing.RoutingTable; import org.elasticsearch.cluster.routing.ShardRouting; -import org.elasticsearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.cluster.service.MasterService; import org.elasticsearch.cluster.service.MasterServiceTaskQueue; @@ -169,16 +168,8 @@ public final class SnapshotsService extends AbstractLifecycleComponent implement /** * We publish metrics of how many shards are in each of the following states - * these should be the list of statuses that potentially block movement in - * {@link SnapshotInProgressAllocationDecider}, or states that might delay - * a snapshot's completion. */ - private static final List TRACKED_SHARD_STATES = List.of( - ShardState.INIT, - ShardState.PAUSED_FOR_NODE_REMOVAL, - ShardState.WAITING, - ShardState.QUEUED - ); + private static final List TRACKED_SHARD_STATES = Arrays.asList(ShardState.values()); public static final String UPDATE_SNAPSHOT_STATUS_ACTION_NAME = "internal:cluster/snapshot/update_snapshot_status"; From ba62ff63e4dd914d575c8ffd48c2a8c4db1312cb Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Tue, 8 Jul 2025 20:06:38 +0930 Subject: [PATCH 53/65] Use com.carrotsearch.hppc.ObjectIntMap.addTo --- .../main/java/org/elasticsearch/snapshots/SnapshotsService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 6605b893c9d48..8e3b780053491 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -4505,7 +4505,7 @@ private Collection getShardsByState() { for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.forRepo(projectId, repository.name())) { for (ShardSnapshotStatus shardSnapshotStatus : snapshot.shards().values()) { if (shardCounts.containsKey(shardSnapshotStatus.state())) { - shardCounts.put(shardSnapshotStatus.state(), shardCounts.get(shardSnapshotStatus.state()) + 1); + shardCounts.addTo(shardSnapshotStatus.state(), 1); } } } From e90ff65f8bd2ac9c1d8f8a111a98e45a15bc01ec Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Wed, 9 Jul 2025 11:45:30 +0930 Subject: [PATCH 54/65] Pre-calculate shard & snapshot state summaries in cluster state --- .../cluster/SnapshotsInProgress.java | 50 ++++++++++++++++++- .../snapshots/SnapshotsService.java | 28 ++++------- 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 155de1c889320..278fd91e0fb23 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -49,6 +49,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -58,6 +59,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Stream; import static org.elasticsearch.repositories.ProjectRepo.PROJECT_REPO_SERIALIZER; @@ -178,6 +180,28 @@ public List forRepo(ProjectId projectId, String repository) { return forRepo(new ProjectRepo(projectId, repository)); } + /** + * Get a summary how many shards are in each {@link ShardState} for this repository + * + * @param projectId The project ID + * @param repository The repository name + * @return A map of each shard state to the count of shards in that state for all in-progress snapshots + */ + public Map shardStateSummaryForRepository(ProjectId projectId, String repository) { + return entries.getOrDefault(new ProjectRepo(projectId, repository), ByRepo.EMPTY).shardStateSummary; + } + + /** + * Get a summary how many snapshots are in each {@link State} for this repository + * + * @param projectId The project ID + * @param repository The repository name + * @return A map of each snapshot state to the count of in-progress snapshots in that state + */ + public Map snapshotStateSummaryForRepository(ProjectId projectId, String repository) { + return entries.getOrDefault(new ProjectRepo(projectId, repository), ByRepo.EMPTY).snapshotStateSummary; + } + /** * Returns the list of snapshots in the specified repository. */ @@ -1875,7 +1899,9 @@ public void writeTo(StreamOutput out) throws IOException { * * @param entries all snapshots executing for a single repository */ - private record ByRepo(List entries) implements Diffable { + private record ByRepo(List entries, Map snapshotStateSummary, Map shardStateSummary) + implements + Diffable { static final ByRepo EMPTY = new ByRepo(List.of()); private static final DiffableUtils.NonDiffableValueSerializer INT_DIFF_VALUE_SERIALIZER = @@ -1892,7 +1918,27 @@ public Integer read(StreamInput in, String key) throws IOException { }; private ByRepo(List entries) { - this.entries = List.copyOf(entries); + this(List.copyOf(entries), calculateStateSummaries(entries)); + } + + private ByRepo(List entries, Tuple, Map> stateSummaries) { + this(entries, stateSummaries.v1(), stateSummaries.v2()); + } + + private static Tuple, Map> calculateStateSummaries(List entries) { + final int[] snapshotCounts = new int[State.values().length]; + final int[] shardCounts = new int[ShardState.values().length]; + for (Entry entry : entries) { + snapshotCounts[entry.state().ordinal()]++; + for (ShardSnapshotStatus shardSnapshotStatus : entry.shards().values()) { + shardCounts[shardSnapshotStatus.state().ordinal()]++; + } + } + final Map snapshotStates = Arrays.stream(State.values()) + .collect(Collectors.toUnmodifiableMap(state -> state, state -> snapshotCounts[state.ordinal()])); + final Map shardStates = Arrays.stream(ShardState.values()) + .collect(Collectors.toUnmodifiableMap(shardState -> shardState, state -> shardCounts[state.ordinal()])); + return Tuple.tuple(snapshotStates, shardStates); } @Override diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 8e3b780053491..288361ac06034 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -9,10 +9,6 @@ package org.elasticsearch.snapshots; -import com.carrotsearch.hppc.ObjectIntHashMap; -import com.carrotsearch.hppc.ObjectIntMap; -import com.carrotsearch.hppc.cursors.ObjectIntCursor; - import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -4496,25 +4492,21 @@ private Collection getShardsByState() { } final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(currentState); final List shardsByState = new ArrayList<>(); - final ObjectIntMap shardCounts = new ObjectIntHashMap<>(TRACKED_SHARD_STATES.size()); + currentState.metadata().projects().forEach((projectId, project) -> { final RepositoriesMetadata repositoriesMetadata = RepositoriesMetadata.get(project); if (repositoriesMetadata != null) { for (RepositoryMetadata repository : repositoriesMetadata.repositories()) { - TRACKED_SHARD_STATES.forEach(shardState -> shardCounts.put(shardState, 0)); - for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.forRepo(projectId, repository.name())) { - for (ShardSnapshotStatus shardSnapshotStatus : snapshot.shards().values()) { - if (shardCounts.containsKey(shardSnapshotStatus.state())) { - shardCounts.addTo(shardSnapshotStatus.state(), 1); - } - } - } + final Map shardStateSummary = snapshotsInProgress.shardStateSummaryForRepository( + projectId, + repository.name() + ); final Map attributesMap = SnapshotMetrics.createAttributesMap(projectId, repository); - for (ObjectIntCursor entry : shardCounts) { - shardsByState.add( - new LongWithAttributes(entry.value, Maps.copyMapWithAddedEntry(attributesMap, "state", entry.key.name())) - ); - } + shardStateSummary.forEach( + (shardState, count) -> shardsByState.add( + new LongWithAttributes(count, Maps.copyMapWithAddedEntry(attributesMap, "state", shardState.name())) + ) + ); } } }); From a43ac7e049aa9652fa8f133e34003e6124a05b53 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 10 Jul 2025 10:03:46 +0930 Subject: [PATCH 55/65] Don't try and get shards for clone entry --- .../java/org/elasticsearch/cluster/SnapshotsInProgress.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index 278fd91e0fb23..e25ba0e7c8ef4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -1930,6 +1930,10 @@ private static Tuple, Map> calculateSta final int[] shardCounts = new int[ShardState.values().length]; for (Entry entry : entries) { snapshotCounts[entry.state().ordinal()]++; + if (entry.isClone()) { + // Can't get shards for clone entry + continue; + } for (ShardSnapshotStatus shardSnapshotStatus : entry.shards().values()) { shardCounts[shardSnapshotStatus.state().ordinal()]++; } From 093ae226528fbf479b3962b86690a2575174378b Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 10 Jul 2025 12:17:54 +0930 Subject: [PATCH 56/65] Add snapshots by state metric --- .../repositories/SnapshotMetricsIT.java | 78 +++++++++++++------ .../repositories/SnapshotMetrics.java | 16 ++-- .../snapshots/SnapshotsService.java | 30 ++++++- 3 files changed, 90 insertions(+), 34 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 732d128655021..71284b08c6729 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.CyclicBarrier; +import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -225,7 +226,7 @@ public void testSnapshotAPMMetrics() throws Exception { assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOT_BLOBS_UPLOADED, expectedAttrs); } - public void testShardsByStateCounts_InitAndQueued() throws Exception { + public void testByStateCounts_InitAndQueuedShards() throws Exception { final String indexName = randomIdentifier(); final int numShards = randomIntBetween(2, 10); final int numReplicas = randomIntBetween(0, 1); @@ -247,9 +248,11 @@ public void testShardsByStateCounts_InitAndQueued() throws Exception { waitForBlockOnAnyDataNode(repositoryName); - // Should be {numShards} in INIT state + // Should be {numShards} in INIT state, and 1 STARTED snapshot Map shardStates = getShardStates(); assertThat(shardStates.get(SnapshotsInProgress.ShardState.INIT), equalTo((long) numShards)); + Map snapshotStates = getSnapshotStates(); + assertThat(snapshotStates.get(SnapshotsInProgress.State.STARTED), equalTo(1L)); // Queue up another snapshot clusterAdmin().prepareCreateSnapshot(TEST_REQUEST_TIMEOUT, repositoryName, randomIdentifier()) @@ -257,9 +260,12 @@ public void testShardsByStateCounts_InitAndQueued() throws Exception { .setWaitForCompletion(false) .get(); - // Should be {numShards} in QUEUED state + // Should be {numShards} in QUEUED and INIT states, and 2 STARTED snapshots shardStates = getShardStates(); + assertThat(shardStates.get(SnapshotsInProgress.ShardState.INIT), equalTo((long) numShards)); assertThat(shardStates.get(SnapshotsInProgress.ShardState.QUEUED), equalTo((long) numShards)); + snapshotStates = getSnapshotStates(); + assertThat(snapshotStates.get(SnapshotsInProgress.State.STARTED), equalTo(2L)); } finally { unblockAllDataNodes(repositoryName); } @@ -267,16 +273,22 @@ public void testShardsByStateCounts_InitAndQueued() throws Exception { // All statuses should return to zero when the snapshots complete awaitNumberOfSnapshotsInProgress(0); getShardStates().forEach((key, value) -> assertThat(value, equalTo(0L))); + getSnapshotStates().forEach((key, value) -> assertThat(value, equalTo(0L))); // Ensure all common attributes are present assertMetricsHaveAttributes( InstrumentType.LONG_GAUGE, - SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATUS, + SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATE, + Map.of("project_id", ProjectId.DEFAULT.id(), "repo_name", repositoryName, "repo_type", "mock") + ); + assertMetricsHaveAttributes( + InstrumentType.LONG_GAUGE, + SnapshotMetrics.SNAPSHOTS_BY_STATE, Map.of("project_id", ProjectId.DEFAULT.id(), "repo_name", repositoryName, "repo_type", "mock") ); } - public void testShardsByStateCounts_PausedForRemoval() throws Exception { + public void testByStateCounts_PausedForRemovalShards() throws Exception { final String indexName = randomIdentifier(); final int numShards = randomIntBetween(2, 10); final int numReplicas = randomIntBetween(0, 1); @@ -321,6 +333,8 @@ public void testShardsByStateCounts_PausedForRemoval() throws Exception { final Map shardStates = getShardStates(); assertThat(shardStates.get(SnapshotsInProgress.ShardState.PAUSED_FOR_NODE_REMOVAL), equalTo((long) numShards)); + final Map snapshotStates = getSnapshotStates(); + assertThat(snapshotStates.get(SnapshotsInProgress.State.STARTED), equalTo(1L)); // clear shutdown metadata to allow snapshot to complete clearShutdownMetadata(clusterService); @@ -328,16 +342,22 @@ public void testShardsByStateCounts_PausedForRemoval() throws Exception { // All statuses should return to zero when the snapshot completes awaitNumberOfSnapshotsInProgress(0); getShardStates().forEach((key, value) -> assertThat(value, equalTo(0L))); + getSnapshotStates().forEach((key, value) -> assertThat(value, equalTo(0L))); // Ensure all common attributes are present assertMetricsHaveAttributes( InstrumentType.LONG_GAUGE, - SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATUS, + SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATE, + Map.of("project_id", ProjectId.DEFAULT.id(), "repo_name", repositoryName, "repo_type", "mock") + ); + assertMetricsHaveAttributes( + InstrumentType.LONG_GAUGE, + SnapshotMetrics.SNAPSHOTS_BY_STATE, Map.of("project_id", ProjectId.DEFAULT.id(), "repo_name", repositoryName, "repo_type", "mock") ); } - public void testShardsByStateCounts_Waiting() throws Exception { + public void testByStateCounts_WaitingShards() throws Exception { final String indexName = randomIdentifier(); final String boundNode = internalCluster().startDataOnlyNode(); final String destinationNode = internalCluster().startDataOnlyNode(); @@ -386,9 +406,11 @@ public void testShardsByStateCounts_Waiting() throws Exception { // Wait till we see a shard in WAITING state createSnapshotInStateListener(clusterService(), repositoryName, indexName, 1, SnapshotsInProgress.ShardState.WAITING); - // Metrics should have a shard in waiting state + // Metrics should have 1 WAITING shard and 1 STARTED snapshot final Map shardStates = getShardStates(); assertThat(shardStates.get(SnapshotsInProgress.ShardState.WAITING), equalTo(1L)); + final Map snapshotStates = getSnapshotStates(); + assertThat(snapshotStates.get(SnapshotsInProgress.State.STARTED), equalTo(1L)); // allow the relocation to complete safeAwait(handoffRequestBarrier); @@ -396,11 +418,17 @@ public void testShardsByStateCounts_Waiting() throws Exception { // All statuses should return to zero when the snapshot completes awaitNumberOfSnapshotsInProgress(0); getShardStates().forEach((key, value) -> assertThat(value, equalTo(0L))); + getSnapshotStates().forEach((key, value) -> assertThat(value, equalTo(0L))); // Ensure all common attributes are present assertMetricsHaveAttributes( InstrumentType.LONG_GAUGE, - SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATUS, + SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATE, + Map.of("project_id", ProjectId.DEFAULT.id(), "repo_name", repositoryName, "repo_type", "mock") + ); + assertMetricsHaveAttributes( + InstrumentType.LONG_GAUGE, + SnapshotMetrics.SNAPSHOTS_BY_STATE, Map.of("project_id", ProjectId.DEFAULT.id(), "repo_name", repositoryName, "repo_type", "mock") ); } @@ -408,9 +436,9 @@ public void testShardsByStateCounts_Waiting() throws Exception { private Map getShardStates() { collectMetrics(); - return allTestTelemetryPlugins().map(testTelemetryPlugin -> { + return allTestTelemetryPlugins().flatMap(testTelemetryPlugin -> { final List longGaugeMeasurement = testTelemetryPlugin.getLongGaugeMeasurement( - SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATUS + SnapshotMetrics.SNAPSHOT_SHARDS_BY_STATE ); final Map shardStates = new HashMap<>(); // last one in wins @@ -420,22 +448,22 @@ private Map getShardStates() { measurement.getLong() ); } - return shardStates; - }).reduce(Map.of(), this::combineCounts); + return shardStates.entrySet().stream(); + }).collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue, Long::sum)); } - private Map combineCounts( - Map lhs, - Map rhs - ) { - final Map result = new HashMap<>(); - Stream.of(lhs, rhs) - .forEach( - countMap -> countMap.forEach( - (status, count) -> result.compute(status, (state, current) -> current == null ? count : current + count) - ) - ); - return result; + private Map getSnapshotStates() { + collectMetrics(); + + return allTestTelemetryPlugins().flatMap(testTelemetryPlugin -> { + final List longGaugeMeasurement = testTelemetryPlugin.getLongGaugeMeasurement(SnapshotMetrics.SNAPSHOTS_BY_STATE); + final Map shardStates = new HashMap<>(); + // last one in wins + for (Measurement measurement : longGaugeMeasurement) { + shardStates.put(SnapshotsInProgress.State.valueOf(measurement.attributes().get("state").toString()), measurement.getLong()); + } + return shardStates.entrySet().stream(); + }).collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue, Long::sum)); } private static void assertMetricsHaveAttributes( diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java index 480b6ea58def3..cc1875caa55a1 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -41,11 +41,12 @@ public record SnapshotMetrics( public static final String SNAPSHOTS_STARTED = "es.repositories.snapshots.started.total"; public static final String SNAPSHOTS_COMPLETED = "es.repositories.snapshots.completed.total"; public static final String SNAPSHOTS_IN_PROGRESS = "es.repositories.snapshots.current"; + public static final String SNAPSHOTS_BY_STATE = "es.repositories.snapshots.by_state.current"; public static final String SNAPSHOT_DURATION = "es.repositories.snapshots.duration.histogram"; public static final String SNAPSHOT_SHARDS_STARTED = "es.repositories.snapshots.shards.started.total"; public static final String SNAPSHOT_SHARDS_COMPLETED = "es.repositories.snapshots.shards.completed.total"; public static final String SNAPSHOT_SHARDS_IN_PROGRESS = "es.repositories.snapshots.shards.current"; - public static final String SNAPSHOT_SHARDS_BY_STATUS = "es.repositories.snapshots.shards.by_status.current"; + public static final String SNAPSHOT_SHARDS_BY_STATE = "es.repositories.snapshots.shards.by_state.current"; public static final String SNAPSHOT_SHARDS_DURATION = "es.repositories.snapshots.shards.duration.histogram"; public static final String SNAPSHOT_BLOBS_UPLOADED = "es.repositories.snapshots.blobs.uploaded.total"; public static final String SNAPSHOT_BYTES_UPLOADED = "es.repositories.snapshots.upload.bytes.total"; @@ -89,13 +90,12 @@ public void createSnapshotsInProgressMetric(Supplier> shardSnapshotsByStatusObserver) { - meterRegistry.registerLongsGauge( - SNAPSHOT_SHARDS_BY_STATUS, - "snapshotting shards by (potentially movement-blocking) status", - "unit", - shardSnapshotsByStatusObserver - ); + public void createSnapshotShardsByStateMetric(Supplier> shardSnapshotsByStatusObserver) { + meterRegistry.registerLongsGauge(SNAPSHOT_SHARDS_BY_STATE, "snapshotting shards by state", "unit", shardSnapshotsByStatusObserver); + } + + public void createSnapshotsByStateMetric(Supplier> snapshotsByStatusObserver) { + meterRegistry.registerLongsGauge(SNAPSHOTS_BY_STATE, "snapshots by state", "unit", snapshotsByStatusObserver); } public static Map createAttributesMap(ProjectId projectId, RepositoryMetadata meta) { diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 288361ac06034..fd23f03bcc800 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -248,7 +248,8 @@ public SnapshotsService( this.transportService = transportService; this.snapshotMetrics = snapshotMetrics; snapshotMetrics.createSnapshotsInProgressMetric(this::getSnapshotsInProgress); - snapshotMetrics.createSnapshotShardsByStatusMetric(this::getShardsByState); + snapshotMetrics.createSnapshotShardsByStateMetric(this::getShardsByState); + snapshotMetrics.createSnapshotsByStateMetric(this::getSnapshotsByState); // The constructor of UpdateSnapshotStatusAction will register itself to the TransportService. this.updateSnapshotStatusHandler = new UpdateSnapshotStatusAction(transportService, clusterService, threadPool, actionFilters); @@ -4513,6 +4514,33 @@ private Collection getShardsByState() { return shardsByState; } + private Collection getSnapshotsByState() { + final ClusterState currentState = clusterService.state(); + // Only the master should report on snapshots-by-state + if (currentState.nodes().isLocalNodeElectedMaster() == false) { + return List.of(); + } + final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(currentState); + final List snapshotsByState = new ArrayList<>(); + + currentState.metadata().projects().forEach((projectId, project) -> { + final RepositoriesMetadata repositoriesMetadata = RepositoriesMetadata.get(project); + if (repositoriesMetadata != null) { + for (RepositoryMetadata repository : repositoriesMetadata.repositories()) { + final Map snapshotStateSummary = snapshotsInProgress + .snapshotStateSummaryForRepository(projectId, repository.name()); + final Map attributesMap = SnapshotMetrics.createAttributesMap(projectId, repository); + snapshotStateSummary.forEach( + (snapshotState, count) -> snapshotsByState.add( + new LongWithAttributes(count, Maps.copyMapWithAddedEntry(attributesMap, "state", snapshotState.name())) + ) + ); + } + } + }); + return snapshotsByState; + } + private record UpdateNodeIdsForRemovalTask() implements ClusterStateTaskListener { @Override public void onFailure(Exception e) { From f5096438c0831379fb16beb7099e7fd738226811 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 10 Jul 2025 12:51:42 +0930 Subject: [PATCH 57/65] Remove remnants of limited state tracking --- .../java/org/elasticsearch/snapshots/SnapshotsService.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index fd23f03bcc800..cea49676b7bae 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -162,11 +162,6 @@ public final class SnapshotsService extends AbstractLifecycleComponent implement private static final Logger logger = LogManager.getLogger(SnapshotsService.class); - /** - * We publish metrics of how many shards are in each of the following states - */ - private static final List TRACKED_SHARD_STATES = Arrays.asList(ShardState.values()); - public static final String UPDATE_SNAPSHOT_STATUS_ACTION_NAME = "internal:cluster/snapshot/update_snapshot_status"; public static final String NO_FEATURE_STATES_VALUE = "none"; From 3ac413c28392174d95d6f987857072447e307b10 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 10 Jul 2025 12:59:54 +0930 Subject: [PATCH 58/65] Remove redundant snapshots in progress metric --- .../repositories/SnapshotMetricsIT.java | 17 -------------- .../repositories/SnapshotMetrics.java | 5 ---- .../snapshots/SnapshotsService.java | 23 ------------------- 3 files changed, 45 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java index 71284b08c6729..91cf76c6f07e4 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/repositories/SnapshotMetricsIT.java @@ -122,7 +122,6 @@ public void testSnapshotAPMMetrics() throws Exception { waitForBlockOnAnyDataNode(repositoryName); collectMetrics(); - assertSnapshotsInProgressMetricIs(greaterThan(0L)); assertShardsInProgressMetricIs(hasItem(greaterThan(0L))); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_STARTED), equalTo(1L)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOTS_COMPLETED), equalTo(0L)); @@ -174,7 +173,6 @@ public void testSnapshotAPMMetrics() throws Exception { assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_STARTED), equalTo((long) numShards)); assertThat(getTotalClusterLongCounterValue(SnapshotMetrics.SNAPSHOT_SHARDS_COMPLETED), equalTo((long) numShards)); - assertSnapshotsInProgressMetricIs(equalTo(0L)); assertShardsInProgressMetricIs(everyItem(equalTo(0L))); // Restore the snapshot @@ -209,7 +207,6 @@ public void testSnapshotAPMMetrics() throws Exception { SnapshotState.SUCCESS.name() ); assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOTS_STARTED, expectedAttrs); - assertMetricsHaveAttributes(InstrumentType.LONG_GAUGE, SnapshotMetrics.SNAPSHOTS_IN_PROGRESS, expectedAttrs); assertMetricsHaveAttributes(InstrumentType.LONG_COUNTER, SnapshotMetrics.SNAPSHOTS_COMPLETED, expectedAttrsWithSnapshotState); assertMetricsHaveAttributes(InstrumentType.DOUBLE_HISTOGRAM, SnapshotMetrics.SNAPSHOT_DURATION, expectedAttrsWithSnapshotState); @@ -507,20 +504,6 @@ private static void assertShardsInProgressMetricIs(Matcher> m assertThat(values, matcher); } - private static void assertSnapshotsInProgressMetricIs(Matcher matcher) { - final List values = internalCluster().getCurrentMasterNodeInstance(PluginsService.class) - .filterPlugins(TestTelemetryPlugin.class) - .map(testTelemetryPlugin -> { - final List longGaugeMeasurement = testTelemetryPlugin.getLongGaugeMeasurement( - SnapshotMetrics.SNAPSHOTS_IN_PROGRESS - ); - return longGaugeMeasurement.getLast().getLong(); - }) - .toList(); - assertThat(values, hasSize(1)); - assertThat(values.getFirst(), matcher); - } - private static void collectMetrics() { allTestTelemetryPlugins().forEach(TestTelemetryPlugin::collect); } diff --git a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java index cc1875caa55a1..9281851d4e6b9 100644 --- a/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/SnapshotMetrics.java @@ -40,7 +40,6 @@ public record SnapshotMetrics( public static final String SNAPSHOTS_STARTED = "es.repositories.snapshots.started.total"; public static final String SNAPSHOTS_COMPLETED = "es.repositories.snapshots.completed.total"; - public static final String SNAPSHOTS_IN_PROGRESS = "es.repositories.snapshots.current"; public static final String SNAPSHOTS_BY_STATE = "es.repositories.snapshots.by_state.current"; public static final String SNAPSHOT_DURATION = "es.repositories.snapshots.duration.histogram"; public static final String SNAPSHOT_SHARDS_STARTED = "es.repositories.snapshots.shards.started.total"; @@ -86,10 +85,6 @@ public void createSnapshotShardsInProgressMetric(Supplier> snapshotsInProgressObserver) { - meterRegistry.registerLongsGauge(SNAPSHOTS_IN_PROGRESS, "snapshots in progress", "unit", snapshotsInProgressObserver); - } - public void createSnapshotShardsByStateMetric(Supplier> shardSnapshotsByStatusObserver) { meterRegistry.registerLongsGauge(SNAPSHOT_SHARDS_BY_STATE, "snapshotting shards by state", "unit", shardSnapshotsByStatusObserver); } diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index cea49676b7bae..cf76ede45ab8a 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -242,7 +242,6 @@ public SnapshotsService( this.threadPool = transportService.getThreadPool(); this.transportService = transportService; this.snapshotMetrics = snapshotMetrics; - snapshotMetrics.createSnapshotsInProgressMetric(this::getSnapshotsInProgress); snapshotMetrics.createSnapshotShardsByStateMetric(this::getShardsByState); snapshotMetrics.createSnapshotsByStateMetric(this::getSnapshotsByState); @@ -4458,28 +4457,6 @@ private SnapshotsInProgress createSnapshot( } } - private Collection getSnapshotsInProgress() { - final ClusterState currentState = clusterService.state(); - // Only the master should report on snapshots-in-progress - if (currentState.nodes().isLocalNodeElectedMaster() == false) { - return List.of(); - } - final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(currentState); - final List snapshotsInProgressMetrics = new ArrayList<>(); - currentState.metadata().projects().forEach((projectId, project) -> { - final RepositoriesMetadata repositoriesMetadata = RepositoriesMetadata.get(project); - if (repositoriesMetadata != null) { - repositoriesMetadata.repositories().forEach(repository -> { - int snapshotCount = snapshotsInProgress.forRepo(projectId, repository.name()).size(); - snapshotsInProgressMetrics.add( - new LongWithAttributes(snapshotCount, SnapshotMetrics.createAttributesMap(projectId, repository)) - ); - }); - } - }); - return snapshotsInProgressMetrics; - } - private Collection getShardsByState() { final ClusterState currentState = clusterService.state(); // Only the master should report on shards-by-state From 1dec741f3aa6a6494dfc5b123c3adc1210ea5a2e Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 10 Jul 2025 13:23:55 +0930 Subject: [PATCH 59/65] Populate and assert on all snapshotStats fields --- .../cluster/node/stats/NodeStatsTests.java | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java index 34f9395fd415b..8a89ce1048872 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -78,6 +78,7 @@ import org.elasticsearch.search.suggest.completion.CompletionStats; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.VersionUtils; +import org.elasticsearch.test.XContentTestUtils; import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.threadpool.ThreadPoolStats; import org.elasticsearch.transport.TransportActionStats; @@ -468,11 +469,13 @@ public void testSerialization() throws IOException { assertNotSame(scriptCacheStats, deserializedScriptCacheStats); } - RepositoriesStats repoThrottlingStats = deserializedNodeStats.getRepositoriesStats(); - assertTrue(repoThrottlingStats.getRepositorySnapshotStats().containsKey("test-repository")); - assertEquals(100, repoThrottlingStats.getRepositorySnapshotStats().get("test-repository").totalReadThrottledNanos()); - assertEquals(200, repoThrottlingStats.getRepositorySnapshotStats().get("test-repository").totalWriteThrottledNanos()); - + RepositoriesStats repoSnapshotStats = deserializedNodeStats.getRepositoriesStats(); + assertTrue(repoSnapshotStats.getRepositorySnapshotStats().containsKey("test-repository")); + RepositoriesStats.SnapshotStats expectedSnapshotStats = nodeStats.getRepositoriesStats() + .getRepositorySnapshotStats() + .get("test-repository"); + RepositoriesStats.SnapshotStats actualSnapshotStats = repoSnapshotStats.getRepositorySnapshotStats().get("test-repository"); + assertEquals(XContentTestUtils.convertToMap(expectedSnapshotStats), XContentTestUtils.convertToMap(actualSnapshotStats)); } } } @@ -1069,7 +1072,20 @@ public static NodeStats createNodeStats() { ); } RepositoriesStats repositoriesStats = new RepositoriesStats( - Map.of("test-repository", new RepositoriesStats.SnapshotStats(100, 200)) + Map.of( + "test-repository", + new RepositoriesStats.SnapshotStats( + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong() + ) + ) ); NodeAllocationStats nodeAllocationStats = new NodeAllocationStats( randomIntBetween(0, 10000), From 850c1169ee1d0b044dea14146b066495214cd85d Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 10 Jul 2025 14:09:13 +0930 Subject: [PATCH 60/65] Fix flakiness in RepositorySnapshotStatsIT, remove dead code --- .../snapshots/RepositorySnapshotStatsIT.java | 10 ++++++++++ .../blobstore/BlobStoreSnapshotMetrics.java | 8 -------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java index 1d2ad3835e5fe..23ffefba79ab5 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java @@ -21,6 +21,7 @@ import java.util.Collections; +import static org.elasticsearch.threadpool.ThreadPool.ESTIMATED_TIME_INTERVAL_SETTING; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.lessThan; @@ -28,6 +29,15 @@ @ESIntegTestCase.ClusterScope(numDataNodes = 0, scope = ESIntegTestCase.Scope.TEST) public class RepositorySnapshotStatsIT extends AbstractSnapshotIntegTestCase { + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + // Make upload time more accurate + .put(ESTIMATED_TIME_INTERVAL_SETTING.getKey(), "0s") + .build(); + } + public void testRepositorySnapshotStats() { logger.info("--> starting a node"); diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java index aa157085e612c..3ccf7e658f1e6 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreSnapshotMetrics.java @@ -51,19 +51,11 @@ public void incrementSnapshotRateLimitingTimeInNanos(long throttleTimeNanos) { snapshotRateLimitingTimeInNanos.inc(throttleTimeNanos); } - public long snapshotRateLimitingTimeInNanos() { - return snapshotRateLimitingTimeInNanos.count(); - } - public void incrementRestoreRateLimitingTimeInNanos(long throttleTimeNanos) { snapshotMetrics.restoreThrottleDurationCounter().incrementBy(throttleTimeNanos, metricAttributes); restoreRateLimitingTimeInNanos.inc(throttleTimeNanos); } - public long restoreRateLimitingTimeInNanos() { - return restoreRateLimitingTimeInNanos.count(); - } - public void incrementCountersForPartUpload(long partSizeInBytes, long partWriteTimeMillis) { snapshotMetrics.bytesUploadedCounter().incrementBy(partSizeInBytes, metricAttributes); snapshotMetrics.uploadDurationCounter().incrementBy(partWriteTimeMillis, metricAttributes); From ca9d1abd6336b3f0cbd01e0f34b768715d850099 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Thu, 10 Jul 2025 14:55:28 +0930 Subject: [PATCH 61/65] Fix assertion --- .../elasticsearch/snapshots/RepositorySnapshotStatsIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java index 23ffefba79ab5..e7a1360ead1c2 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/RepositorySnapshotStatsIT.java @@ -24,7 +24,7 @@ import static org.elasticsearch.threadpool.ThreadPool.ESTIMATED_TIME_INTERVAL_SETTING; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.lessThan; +import static org.hamcrest.Matchers.lessThanOrEqualTo; @ESIntegTestCase.ClusterScope(numDataNodes = 0, scope = ESIntegTestCase.Scope.TEST) public class RepositorySnapshotStatsIT extends AbstractSnapshotIntegTestCase { @@ -97,6 +97,6 @@ public void testRepositorySnapshotStats() { assertThat(snapshotStats.numberOfBytesUploaded(), greaterThan(0L)); assertThat(snapshotStats.totalUploadTimeInMillis(), greaterThan(0L)); assertThat(snapshotStats.totalUploadReadTimeInMillis(), greaterThan(0L)); - assertThat(snapshotStats.totalUploadReadTimeInMillis(), lessThan(snapshotStats.totalUploadTimeInMillis())); + assertThat(snapshotStats.totalUploadReadTimeInMillis(), lessThanOrEqualTo(snapshotStats.totalUploadTimeInMillis())); } } From 87864ecdfc2c2196dbb3b9443d09d0e40c776049 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Wed, 16 Jul 2025 10:12:19 +1000 Subject: [PATCH 62/65] Calculate snapshot stats on metrics thread and only when stale --- .../cluster/SnapshotsInProgress.java | 46 ++++++------ .../snapshots/SnapshotsService.java | 70 +++++++++++-------- 2 files changed, 62 insertions(+), 54 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java index e25ba0e7c8ef4..9012d685b25f7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/elasticsearch/cluster/SnapshotsInProgress.java @@ -71,6 +71,11 @@ public class SnapshotsInProgress extends AbstractNamedDiffable implement private static final Logger logger = LogManager.getLogger(SnapshotsInProgress.class); + private static final Tuple, Map> NO_SNAPSHOTS_IN_PROGRESS_STATS = Tuple.tuple( + Arrays.stream(State.values()).collect(Collectors.toUnmodifiableMap(v -> v, v -> 0)), + Arrays.stream(ShardState.values()).collect(Collectors.toUnmodifiableMap(v -> v, v -> 0)) + ); + public static final SnapshotsInProgress EMPTY = new SnapshotsInProgress(Map.of(), Set.of()); public static final String TYPE = "snapshots"; @@ -181,25 +186,19 @@ public List forRepo(ProjectId projectId, String repository) { } /** - * Get a summary how many shards are in each {@link ShardState} for this repository - * - * @param projectId The project ID - * @param repository The repository name - * @return A map of each shard state to the count of shards in that state for all in-progress snapshots - */ - public Map shardStateSummaryForRepository(ProjectId projectId, String repository) { - return entries.getOrDefault(new ProjectRepo(projectId, repository), ByRepo.EMPTY).shardStateSummary; - } - - /** - * Get a summary how many snapshots are in each {@link State} for this repository + * Calculate snapshot and shard state summaries for this repository * * @param projectId The project ID * @param repository The repository name - * @return A map of each snapshot state to the count of in-progress snapshots in that state + * @return A tuple containing the snapshot and shard stat summaries */ - public Map snapshotStateSummaryForRepository(ProjectId projectId, String repository) { - return entries.getOrDefault(new ProjectRepo(projectId, repository), ByRepo.EMPTY).snapshotStateSummary; + public Tuple, Map> shardStateSummaryForRepository(ProjectId projectId, String repository) { + ByRepo byRepo = entries.get(new ProjectRepo(projectId, repository)); + if (byRepo != null) { + return byRepo.calculateStateSummaries(); + } else { + return NO_SNAPSHOTS_IN_PROGRESS_STATS; + } } /** @@ -1899,9 +1898,7 @@ public void writeTo(StreamOutput out) throws IOException { * * @param entries all snapshots executing for a single repository */ - private record ByRepo(List entries, Map snapshotStateSummary, Map shardStateSummary) - implements - Diffable { + private record ByRepo(List entries) implements Diffable { static final ByRepo EMPTY = new ByRepo(List.of()); private static final DiffableUtils.NonDiffableValueSerializer INT_DIFF_VALUE_SERIALIZER = @@ -1918,14 +1915,15 @@ public Integer read(StreamInput in, String key) throws IOException { }; private ByRepo(List entries) { - this(List.copyOf(entries), calculateStateSummaries(entries)); - } - - private ByRepo(List entries, Tuple, Map> stateSummaries) { - this(entries, stateSummaries.v1(), stateSummaries.v2()); + this.entries = List.copyOf(entries); } - private static Tuple, Map> calculateStateSummaries(List entries) { + /** + * Calculate summaries of how many shards and snapshots are in each shard/snapshot state + * + * @return a {@link Tuple} containing the snapshot and shard state summaries respectively + */ + public Tuple, Map> calculateStateSummaries() { final int[] snapshotCounts = new int[State.values().length]; final int[] shardCounts = new int[ShardState.values().length]; for (Entry entry : entries) { diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index cf76ede45ab8a..374a1a2e064b3 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -208,6 +208,8 @@ public final class SnapshotsService extends AbstractLifecycleComponent implement private final ShardSnapshotUpdateCompletionHandler shardSnapshotUpdateCompletionHandler; + private SnapshotStats snapshotStats; + /** * Setting that specifies the maximum number of allowed concurrent snapshot create and delete operations in the * cluster state. The number of concurrent operations in a cluster state is defined as the sum of @@ -4463,27 +4465,7 @@ private Collection getShardsByState() { if (currentState.nodes().isLocalNodeElectedMaster() == false) { return List.of(); } - final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(currentState); - final List shardsByState = new ArrayList<>(); - - currentState.metadata().projects().forEach((projectId, project) -> { - final RepositoriesMetadata repositoriesMetadata = RepositoriesMetadata.get(project); - if (repositoriesMetadata != null) { - for (RepositoryMetadata repository : repositoriesMetadata.repositories()) { - final Map shardStateSummary = snapshotsInProgress.shardStateSummaryForRepository( - projectId, - repository.name() - ); - final Map attributesMap = SnapshotMetrics.createAttributesMap(projectId, repository); - shardStateSummary.forEach( - (shardState, count) -> shardsByState.add( - new LongWithAttributes(count, Maps.copyMapWithAddedEntry(attributesMap, "state", shardState.name())) - ) - ); - } - } - }); - return shardsByState; + return recalculateIfStale(currentState).shardStateMetrics(); } private Collection getSnapshotsByState() { @@ -4492,25 +4474,44 @@ private Collection getSnapshotsByState() { if (currentState.nodes().isLocalNodeElectedMaster() == false) { return List.of(); } + return recalculateIfStale(currentState).snapshotStateMetrics(); + } + + private SnapshotStats recalculateIfStale(ClusterState currentState) { + if (snapshotStats == null || Objects.equals(snapshotStats.fromClusterState(), currentState.stateUUID()) == false) { + snapshotStats = recalculateSnapshotStats(currentState); + } + return snapshotStats; + } + + private SnapshotStats recalculateSnapshotStats(ClusterState currentState) { final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(currentState); - final List snapshotsByState = new ArrayList<>(); + final List snapshotStateMetrics = new ArrayList<>(); + final List shardStateMetrics = new ArrayList<>(); currentState.metadata().projects().forEach((projectId, project) -> { final RepositoriesMetadata repositoriesMetadata = RepositoriesMetadata.get(project); if (repositoriesMetadata != null) { for (RepositoryMetadata repository : repositoriesMetadata.repositories()) { - final Map snapshotStateSummary = snapshotsInProgress - .snapshotStateSummaryForRepository(projectId, repository.name()); + final Tuple, Map> stateSummaries = snapshotsInProgress + .shardStateSummaryForRepository(projectId, repository.name()); final Map attributesMap = SnapshotMetrics.createAttributesMap(projectId, repository); - snapshotStateSummary.forEach( - (snapshotState, count) -> snapshotsByState.add( - new LongWithAttributes(count, Maps.copyMapWithAddedEntry(attributesMap, "state", snapshotState.name())) - ) - ); + stateSummaries.v1() + .forEach( + (snapshotState, count) -> snapshotStateMetrics.add( + new LongWithAttributes(count, Maps.copyMapWithAddedEntry(attributesMap, "state", snapshotState.name())) + ) + ); + stateSummaries.v2() + .forEach( + (shardState, count) -> shardStateMetrics.add( + new LongWithAttributes(count, Maps.copyMapWithAddedEntry(attributesMap, "state", shardState.name())) + ) + ); } } }); - return snapshotsByState; + return new SnapshotStats(currentState.stateUUID(), snapshotStateMetrics, shardStateMetrics); } private record UpdateNodeIdsForRemovalTask() implements ClusterStateTaskListener { @@ -4544,4 +4545,13 @@ private static boolean supportsNodeRemovalTracking(ClusterState clusterState) { } private final MasterServiceTaskQueue updateNodeIdsToRemoveQueue; + + /** + * A cached copy of the snapshot and shard state metrics + */ + private record SnapshotStats( + String fromClusterState, + Collection snapshotStateMetrics, + Collection shardStateMetrics + ) {} } From 9bea854496827b1a2199db71e1a87ed9bce8177d Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Wed, 16 Jul 2025 12:45:31 +1000 Subject: [PATCH 63/65] Align toString with field names, add units to IndexShardSnapshotStatus.Copy fields/toString --- .../status/SnapshotIndexShardStatus.java | 4 +-- .../snapshots/IndexShardSnapshotStatus.java | 28 +++++++++---------- .../blobstore/BlobStoreRepository.java | 4 +-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/SnapshotIndexShardStatus.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/SnapshotIndexShardStatus.java index c745199627026..bf4f8afd302a3 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/SnapshotIndexShardStatus.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/SnapshotIndexShardStatus.java @@ -68,8 +68,8 @@ public SnapshotIndexShardStatus(StreamInput in) throws IOException { default -> throw new IllegalArgumentException("Unknown stage type " + indexShardStatus.getStage()); }; this.stats = new SnapshotStats( - indexShardStatus.getStartTime(), - indexShardStatus.getTotalTime(), + indexShardStatus.getStartTimeMillis(), + indexShardStatus.getTotalTimeMillis(), indexShardStatus.getIncrementalFileCount(), indexShardStatus.getTotalFileCount(), indexShardStatus.getProcessedFileCount(), diff --git a/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java b/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java index 2d429613438a1..23e63d5447088 100644 --- a/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java +++ b/server/src/main/java/org/elasticsearch/index/snapshots/IndexShardSnapshotStatus.java @@ -354,8 +354,8 @@ public static IndexShardSnapshotStatus.Copy newDone( public static class Copy { private final Stage stage; - private final long startTime; - private final long totalTime; + private final long startTimeMillis; + private final long totalTimeMillis; private final int incrementalFileCount; private final int totalFileCount; private final int processedFileCount; @@ -379,8 +379,8 @@ public Copy( final String statusDescription ) { this.stage = stage; - this.startTime = startTime; - this.totalTime = totalTime; + this.startTimeMillis = startTime; + this.totalTimeMillis = totalTime; this.incrementalFileCount = incrementalFileCount; this.totalFileCount = totalFileCount; this.processedFileCount = processedFileCount; @@ -395,12 +395,12 @@ public Stage getStage() { return stage; } - public long getStartTime() { - return startTime; + public long getStartTimeMillis() { + return startTimeMillis; } - public long getTotalTime() { - return totalTime; + public long getTotalTimeMillis() { + return totalTimeMillis; } public int getIncrementalFileCount() { @@ -440,10 +440,10 @@ public String toString() { return "index shard snapshot status (" + "stage=" + stage - + ", startTime=" - + startTime - + ", totalTime=" - + totalTime + + ", startTimeMillis=" + + startTimeMillis + + ", totalTimeMillis=" + + totalTimeMillis + ", incrementalFileCount=" + incrementalFileCount + ", totalFileCount=" @@ -470,9 +470,9 @@ public String toString() { return "index shard snapshot status (" + "stage=" + stage - + ", startTime=" + + ", startTimeMillis=" + startTimeMillis - + ", totalTime=" + + ", totalTimeMillis=" + totalTimeMillis + ", incrementalFileCount=" + incrementalFileCount diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 59cc6032f1433..54ff971eb2425 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -3467,8 +3467,8 @@ private void doSnapshotShard(SnapshotShardContext context) { final BlobStoreIndexShardSnapshot blobStoreIndexShardSnapshot = new BlobStoreIndexShardSnapshot( snapshotId.getName(), indexCommitPointFiles, - lastSnapshotStatus.getStartTime(), - threadPool.absoluteTimeInMillis() - lastSnapshotStatus.getStartTime(), + lastSnapshotStatus.getStartTimeMillis(), + threadPool.absoluteTimeInMillis() - lastSnapshotStatus.getStartTimeMillis(), lastSnapshotStatus.getIncrementalFileCount(), lastSnapshotStatus.getIncrementalSize() ); From 74c3427d395065e279c5579741402fc6f3e5255f Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Wed, 16 Jul 2025 13:24:07 +1000 Subject: [PATCH 64/65] Only recalculate stats if SnapshotsInProgress changed --- .../elasticsearch/snapshots/SnapshotsService.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 374a1a2e064b3..8c2baa06608b0 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -4478,7 +4478,9 @@ private Collection getSnapshotsByState() { } private SnapshotStats recalculateIfStale(ClusterState currentState) { - if (snapshotStats == null || Objects.equals(snapshotStats.fromClusterState(), currentState.stateUUID()) == false) { + if (snapshotStats == null + || (Objects.equals(snapshotStats.clusterStateId(), currentState.stateUUID()) == false + && System.identityHashCode(SnapshotsInProgress.get(currentState)) != snapshotStats.snapshotsInProgressIdentityHashcode())) { snapshotStats = recalculateSnapshotStats(currentState); } return snapshotStats; @@ -4511,7 +4513,12 @@ private SnapshotStats recalculateSnapshotStats(ClusterState currentState) { } } }); - return new SnapshotStats(currentState.stateUUID(), snapshotStateMetrics, shardStateMetrics); + return new SnapshotStats( + currentState.stateUUID(), + System.identityHashCode(snapshotsInProgress), + snapshotStateMetrics, + shardStateMetrics + ); } private record UpdateNodeIdsForRemovalTask() implements ClusterStateTaskListener { @@ -4550,7 +4557,8 @@ private static boolean supportsNodeRemovalTracking(ClusterState clusterState) { * A cached copy of the snapshot and shard state metrics */ private record SnapshotStats( - String fromClusterState, + String clusterStateId, + int snapshotsInProgressIdentityHashcode, Collection snapshotStateMetrics, Collection shardStateMetrics ) {} From 346c15f47b0581c69f4c485286ce71fceb431403 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Wed, 16 Jul 2025 16:14:41 +1000 Subject: [PATCH 65/65] Tidy up, naming --- .../snapshots/SnapshotsService.java | 49 +++++++++++++------ 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 8c2baa06608b0..bfe0e9f7895eb 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -208,7 +208,7 @@ public final class SnapshotsService extends AbstractLifecycleComponent implement private final ShardSnapshotUpdateCompletionHandler shardSnapshotUpdateCompletionHandler; - private SnapshotStats snapshotStats; + private CachedSnapshotStateMetrics cachedSnapshotStateMetrics; /** * Setting that specifies the maximum number of allowed concurrent snapshot create and delete operations in the @@ -4477,16 +4477,14 @@ private Collection getSnapshotsByState() { return recalculateIfStale(currentState).snapshotStateMetrics(); } - private SnapshotStats recalculateIfStale(ClusterState currentState) { - if (snapshotStats == null - || (Objects.equals(snapshotStats.clusterStateId(), currentState.stateUUID()) == false - && System.identityHashCode(SnapshotsInProgress.get(currentState)) != snapshotStats.snapshotsInProgressIdentityHashcode())) { - snapshotStats = recalculateSnapshotStats(currentState); + private CachedSnapshotStateMetrics recalculateIfStale(ClusterState currentState) { + if (cachedSnapshotStateMetrics == null || cachedSnapshotStateMetrics.isStale(currentState)) { + cachedSnapshotStateMetrics = recalculateSnapshotStats(currentState); } - return snapshotStats; + return cachedSnapshotStateMetrics; } - private SnapshotStats recalculateSnapshotStats(ClusterState currentState) { + private CachedSnapshotStateMetrics recalculateSnapshotStats(ClusterState currentState) { final SnapshotsInProgress snapshotsInProgress = SnapshotsInProgress.get(currentState); final List snapshotStateMetrics = new ArrayList<>(); final List shardStateMetrics = new ArrayList<>(); @@ -4513,12 +4511,7 @@ private SnapshotStats recalculateSnapshotStats(ClusterState currentState) { } } }); - return new SnapshotStats( - currentState.stateUUID(), - System.identityHashCode(snapshotsInProgress), - snapshotStateMetrics, - shardStateMetrics - ); + return new CachedSnapshotStateMetrics(currentState, snapshotStateMetrics, shardStateMetrics); } private record UpdateNodeIdsForRemovalTask() implements ClusterStateTaskListener { @@ -4556,10 +4549,34 @@ private static boolean supportsNodeRemovalTracking(ClusterState clusterState) { /** * A cached copy of the snapshot and shard state metrics */ - private record SnapshotStats( + private record CachedSnapshotStateMetrics( String clusterStateId, int snapshotsInProgressIdentityHashcode, Collection snapshotStateMetrics, Collection shardStateMetrics - ) {} + ) { + CachedSnapshotStateMetrics( + ClusterState sourceState, + Collection snapshotStateMetrics, + Collection shardStateMetrics + ) { + this( + sourceState.stateUUID(), + System.identityHashCode(SnapshotsInProgress.get(sourceState)), + snapshotStateMetrics, + shardStateMetrics + ); + } + + /** + * Are these metrics stale? + * + * @param currentClusterState The current cluster state + * @return true if these metrics were calculated from a prior cluster state and need to be recalculated, false otherwise + */ + public boolean isStale(ClusterState currentClusterState) { + return (Objects.equals(clusterStateId, currentClusterState.stateUUID()) == false + && System.identityHashCode(SnapshotsInProgress.get(currentClusterState)) != snapshotsInProgressIdentityHashcode); + } + } }