@@ -87,6 +87,8 @@ public class RemoteClusterStateService implements Closeable {
87
87
88
88
public static final TimeValue GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT = TimeValue .timeValueMillis (20000 );
89
89
90
+ public static final TimeValue METADATA_MANIFEST_UPLOAD_TIMEOUT_DEFAULT = TimeValue .timeValueMillis (20000 );
91
+
90
92
public static final Setting <TimeValue > INDEX_METADATA_UPLOAD_TIMEOUT_SETTING = Setting .timeSetting (
91
93
"cluster.remote_store.state.index_metadata.upload_timeout" ,
92
94
INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT ,
@@ -101,6 +103,13 @@ public class RemoteClusterStateService implements Closeable {
101
103
Setting .Property .NodeScope
102
104
);
103
105
106
+ public static final Setting <TimeValue > METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING = Setting .timeSetting (
107
+ "cluster.remote_store.state.metadata_manifest.upload_timeout" ,
108
+ METADATA_MANIFEST_UPLOAD_TIMEOUT_DEFAULT ,
109
+ Setting .Property .Dynamic ,
110
+ Setting .Property .NodeScope
111
+ );
112
+
104
113
public static final ChecksumBlobStoreFormat <IndexMetadata > INDEX_METADATA_FORMAT = new ChecksumBlobStoreFormat <>(
105
114
"index-metadata" ,
106
115
METADATA_NAME_FORMAT ,
@@ -157,6 +166,7 @@ public class RemoteClusterStateService implements Closeable {
157
166
158
167
private volatile TimeValue indexMetadataUploadTimeout ;
159
168
private volatile TimeValue globalMetadataUploadTimeout ;
169
+ private volatile TimeValue metadataManifestUploadTimeout ;
160
170
161
171
private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean (false );
162
172
private final RemotePersistenceStats remoteStateStats ;
@@ -190,9 +200,11 @@ public RemoteClusterStateService(
190
200
this .slowWriteLoggingThreshold = clusterSettings .get (SLOW_WRITE_LOGGING_THRESHOLD );
191
201
this .indexMetadataUploadTimeout = clusterSettings .get (INDEX_METADATA_UPLOAD_TIMEOUT_SETTING );
192
202
this .globalMetadataUploadTimeout = clusterSettings .get (GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING );
203
+ this .metadataManifestUploadTimeout = clusterSettings .get (METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING );
193
204
clusterSettings .addSettingsUpdateConsumer (SLOW_WRITE_LOGGING_THRESHOLD , this ::setSlowWriteLoggingThreshold );
194
205
clusterSettings .addSettingsUpdateConsumer (INDEX_METADATA_UPLOAD_TIMEOUT_SETTING , this ::setIndexMetadataUploadTimeout );
195
206
clusterSettings .addSettingsUpdateConsumer (GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING , this ::setGlobalMetadataUploadTimeout );
207
+ clusterSettings .addSettingsUpdateConsumer (METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING , this ::setMetadataManifestUploadTimeout );
196
208
this .remoteStateStats = new RemotePersistenceStats ();
197
209
}
198
210
@@ -401,21 +413,21 @@ private String writeGlobalMetadata(ClusterState clusterState) throws IOException
401
413
try {
402
414
if (latch .await (getGlobalMetadataUploadTimeout ().millis (), TimeUnit .MILLISECONDS ) == false ) {
403
415
// TODO: We should add metrics where transfer is timing out. [Issue: #10687]
404
- GlobalMetadataTransferException ex = new GlobalMetadataTransferException (
416
+ RemoteStateTransferException ex = new RemoteStateTransferException (
405
417
String .format (Locale .ROOT , "Timed out waiting for transfer of global metadata to complete" )
406
418
);
407
419
throw ex ;
408
420
}
409
421
} catch (InterruptedException ex ) {
410
- GlobalMetadataTransferException exception = new GlobalMetadataTransferException (
422
+ RemoteStateTransferException exception = new RemoteStateTransferException (
411
423
String .format (Locale .ROOT , "Timed out waiting for transfer of global metadata to complete - %s" ),
412
424
ex
413
425
);
414
426
Thread .currentThread ().interrupt ();
415
427
throw exception ;
416
428
}
417
429
if (exceptionReference .get () != null ) {
418
- throw new GlobalMetadataTransferException (exceptionReference .get ().getMessage (), exceptionReference .get ());
430
+ throw new RemoteStateTransferException (exceptionReference .get ().getMessage (), exceptionReference .get ());
419
431
}
420
432
return result .get ();
421
433
}
@@ -440,7 +452,7 @@ private List<UploadedIndexMetadata> writeIndexMetadataParallel(ClusterState clus
440
452
);
441
453
result .add (uploadedIndexMetadata );
442
454
}, ex -> {
443
- assert ex instanceof IndexMetadataTransferException ;
455
+ assert ex instanceof RemoteStateTransferException ;
444
456
logger .error (
445
457
() -> new ParameterizedMessage ("Exception during transfer of IndexMetadata to Remote {}" , ex .getMessage ()),
446
458
ex
@@ -457,7 +469,7 @@ private List<UploadedIndexMetadata> writeIndexMetadataParallel(ClusterState clus
457
469
458
470
try {
459
471
if (latch .await (getIndexMetadataUploadTimeout ().millis (), TimeUnit .MILLISECONDS ) == false ) {
460
- IndexMetadataTransferException ex = new IndexMetadataTransferException (
472
+ RemoteStateTransferException ex = new RemoteStateTransferException (
461
473
String .format (
462
474
Locale .ROOT ,
463
475
"Timed out waiting for transfer of index metadata to complete - %s" ,
@@ -469,7 +481,7 @@ private List<UploadedIndexMetadata> writeIndexMetadataParallel(ClusterState clus
469
481
}
470
482
} catch (InterruptedException ex ) {
471
483
exceptionList .forEach (ex ::addSuppressed );
472
- IndexMetadataTransferException exception = new IndexMetadataTransferException (
484
+ RemoteStateTransferException exception = new RemoteStateTransferException (
473
485
String .format (
474
486
Locale .ROOT ,
475
487
"Timed out waiting for transfer of index metadata to complete - %s" ,
@@ -481,7 +493,7 @@ private List<UploadedIndexMetadata> writeIndexMetadataParallel(ClusterState clus
481
493
throw exception ;
482
494
}
483
495
if (exceptionList .size () > 0 ) {
484
- IndexMetadataTransferException exception = new IndexMetadataTransferException (
496
+ RemoteStateTransferException exception = new RemoteStateTransferException (
485
497
String .format (
486
498
Locale .ROOT ,
487
499
"Exception during transfer of IndexMetadata to Remote %s" ,
@@ -520,7 +532,7 @@ private void writeIndexMetadataAsync(
520
532
indexMetadataContainer .path ().buildAsString () + indexMetadataFilename
521
533
)
522
534
),
523
- ex -> latchedActionListener .onFailure (new IndexMetadataTransferException (indexMetadata .getIndex ().toString (), ex ))
535
+ ex -> latchedActionListener .onFailure (new RemoteStateTransferException (indexMetadata .getIndex ().toString (), ex ))
524
536
);
525
537
526
538
INDEX_METADATA_FORMAT .writeAsyncWithUrgentPriority (
@@ -601,14 +613,45 @@ private ClusterMetadataManifest uploadManifest(
601
613
602
614
private void writeMetadataManifest (String clusterName , String clusterUUID , ClusterMetadataManifest uploadManifest , String fileName )
603
615
throws IOException {
616
+ AtomicReference <String > result = new AtomicReference <String >();
617
+ AtomicReference <Exception > exceptionReference = new AtomicReference <Exception >();
618
+
604
619
final BlobContainer metadataManifestContainer = manifestContainer (clusterName , clusterUUID );
605
- CLUSTER_METADATA_MANIFEST_FORMAT .write (
620
+
621
+ // latch to wait until upload is not finished
622
+ CountDownLatch latch = new CountDownLatch (1 );
623
+
624
+ LatchedActionListener completionListener = new LatchedActionListener <>(ActionListener .wrap (resp -> {
625
+ logger .trace (String .format (Locale .ROOT , "Manifest file uploaded successfully." ));
626
+ }, ex -> { exceptionReference .set (ex ); }), latch );
627
+
628
+ CLUSTER_METADATA_MANIFEST_FORMAT .writeAsyncWithUrgentPriority (
606
629
uploadManifest ,
607
630
metadataManifestContainer ,
608
631
fileName ,
609
632
blobStoreRepository .getCompressor (),
633
+ completionListener ,
610
634
FORMAT_PARAMS
611
635
);
636
+
637
+ try {
638
+ if (latch .await (getMetadataManifestUploadTimeout ().millis (), TimeUnit .MILLISECONDS ) == false ) {
639
+ RemoteStateTransferException ex = new RemoteStateTransferException (
640
+ String .format (Locale .ROOT , "Timed out waiting for transfer of manifest file to complete" )
641
+ );
642
+ throw ex ;
643
+ }
644
+ } catch (InterruptedException ex ) {
645
+ RemoteStateTransferException exception = new RemoteStateTransferException (
646
+ String .format (Locale .ROOT , "Timed out waiting for transfer of manifest file to complete - %s" ),
647
+ ex
648
+ );
649
+ Thread .currentThread ().interrupt ();
650
+ throw exception ;
651
+ }
652
+ if (exceptionReference .get () != null ) {
653
+ throw new RemoteStateTransferException (exceptionReference .get ().getMessage (), exceptionReference .get ());
654
+ }
612
655
logger .debug (
613
656
"Metadata manifest file [{}] written during [{}] phase. " ,
614
657
fileName ,
@@ -668,6 +711,10 @@ private void setGlobalMetadataUploadTimeout(TimeValue newGlobalMetadataUploadTim
668
711
this .globalMetadataUploadTimeout = newGlobalMetadataUploadTimeout ;
669
712
}
670
713
714
+ private void setMetadataManifestUploadTimeout (TimeValue newMetadataManifestUploadTimeout ) {
715
+ this .metadataManifestUploadTimeout = newMetadataManifestUploadTimeout ;
716
+ }
717
+
671
718
public TimeValue getIndexMetadataUploadTimeout () {
672
719
return this .indexMetadataUploadTimeout ;
673
720
}
@@ -676,6 +723,10 @@ public TimeValue getGlobalMetadataUploadTimeout() {
676
723
return this .globalMetadataUploadTimeout ;
677
724
}
678
725
726
+ public TimeValue getMetadataManifestUploadTimeout () {
727
+ return this .metadataManifestUploadTimeout ;
728
+ }
729
+
679
730
static String getManifestFileName (long term , long version , boolean committed ) {
680
731
// 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest__<inverted_term>__<inverted_version>__C/P__<inverted__timestamp>__<codec_version>
681
732
return String .join (
@@ -1088,29 +1139,15 @@ public void writeMetadataFailed() {
1088
1139
}
1089
1140
1090
1141
/**
1091
- * Exception for IndexMetadata transfer failures to remote
1092
- */
1093
- static class IndexMetadataTransferException extends RuntimeException {
1094
-
1095
- public IndexMetadataTransferException (String errorDesc ) {
1096
- super (errorDesc );
1097
- }
1098
-
1099
- public IndexMetadataTransferException (String errorDesc , Throwable cause ) {
1100
- super (errorDesc , cause );
1101
- }
1102
- }
1103
-
1104
- /**
1105
- * Exception for GlobalMetadata transfer failures to remote
1142
+ * Exception for Remote state transfer.
1106
1143
*/
1107
- static class GlobalMetadataTransferException extends RuntimeException {
1144
+ static class RemoteStateTransferException extends RuntimeException {
1108
1145
1109
- public GlobalMetadataTransferException (String errorDesc ) {
1146
+ public RemoteStateTransferException (String errorDesc ) {
1110
1147
super (errorDesc );
1111
1148
}
1112
1149
1113
- public GlobalMetadataTransferException (String errorDesc , Throwable cause ) {
1150
+ public RemoteStateTransferException (String errorDesc , Throwable cause ) {
1114
1151
super (errorDesc , cause );
1115
1152
}
1116
1153
}
0 commit comments