16
16
import org .opensearch .common .SetOnce ;
17
17
import org .opensearch .common .blobstore .BlobMetadata ;
18
18
import org .opensearch .common .blobstore .BlobPath ;
19
+ import org .opensearch .common .blobstore .FetchBlobResult ;
19
20
import org .opensearch .common .blobstore .stream .write .WritePriority ;
20
21
import org .opensearch .common .io .VersionedCodecStreamWrapper ;
21
22
import org .opensearch .common .io .stream .BytesStreamOutput ;
36
37
import java .nio .file .Files ;
37
38
import java .nio .file .Path ;
38
39
import java .util .ArrayList ;
40
+ import java .util .Base64 ;
39
41
import java .util .HashMap ;
40
42
import java .util .HashSet ;
41
43
import java .util .List ;
@@ -63,6 +65,7 @@ public class TranslogTransferManager {
63
65
private final RemoteTranslogTransferTracker remoteTranslogTransferTracker ;
64
66
private final RemoteStoreSettings remoteStoreSettings ;
65
67
private static final int METADATA_FILES_TO_FETCH = 10 ;
68
+ boolean ckpAsMetadata ;
66
69
67
70
private final Logger logger ;
68
71
@@ -79,7 +82,8 @@ public TranslogTransferManager(
79
82
BlobPath remoteMetadataTransferPath ,
80
83
FileTransferTracker fileTransferTracker ,
81
84
RemoteTranslogTransferTracker remoteTranslogTransferTracker ,
82
- RemoteStoreSettings remoteStoreSettings
85
+ RemoteStoreSettings remoteStoreSettings ,
86
+ boolean ckpAsMetadata
83
87
) {
84
88
this .shardId = shardId ;
85
89
this .transferService = transferService ;
@@ -89,6 +93,7 @@ public TranslogTransferManager(
89
93
this .logger = Loggers .getLogger (getClass (), shardId );
90
94
this .remoteTranslogTransferTracker = remoteTranslogTransferTracker ;
91
95
this .remoteStoreSettings = remoteStoreSettings ;
96
+ this .ckpAsMetadata = ckpAsMetadata ;
92
97
}
93
98
94
99
public RemoteTranslogTransferTracker getRemoteTranslogTransferTracker () {
@@ -110,8 +115,12 @@ public boolean transferSnapshot(TransferSnapshot transferSnapshot, TranslogTrans
110
115
long prevUploadTimeInMillis = remoteTranslogTransferTracker .getTotalUploadTimeInMillis ();
111
116
112
117
try {
113
- toUpload .addAll (fileTransferTracker .exclusionFilter (transferSnapshot .getTranslogFileSnapshots ()));
114
- toUpload .addAll (fileTransferTracker .exclusionFilter ((transferSnapshot .getCheckpointFileSnapshots ())));
118
+ if (ckpAsMetadata ) {
119
+ toUpload .addAll (fileTransferTracker .exclusionFilter (transferSnapshot .getTranslogFileWithMetadataSnapshots ()));
120
+ } else {
121
+ toUpload .addAll (fileTransferTracker .exclusionFilter (transferSnapshot .getTranslogFileSnapshots ()));
122
+ toUpload .addAll (fileTransferTracker .exclusionFilter ((transferSnapshot .getCheckpointFileSnapshots ())));
123
+ }
115
124
if (toUpload .isEmpty ()) {
116
125
logger .trace ("Nothing to upload for transfer" );
117
126
return true ;
@@ -236,15 +245,101 @@ public boolean downloadTranslog(String primaryTerm, String generation, Path loca
236
245
generation ,
237
246
location
238
247
);
239
- // Download Checkpoint file from remote to local FS
240
248
String ckpFileName = Translog .getCommitCheckpointFileName (Long .parseLong (generation ));
241
- downloadToFS (ckpFileName , location , primaryTerm );
242
- // Download translog file from remote to local FS
243
249
String translogFilename = Translog .getFilename (Long .parseLong (generation ));
244
- downloadToFS (translogFilename , location , primaryTerm );
250
+ if (ckpAsMetadata == false ) {
251
+ // Download Checkpoint file from remote to local FS
252
+ downloadToFS (ckpFileName , location , primaryTerm );
253
+ // Download translog file from remote to local FS
254
+ downloadToFS (translogFilename , location , primaryTerm );
255
+ } else {
256
+ // Download translog.tlog file with object metadata from remote to local FS
257
+ Map <String , String > metadata = downloadTranslogToFSAndGetMetadata (translogFilename , location , primaryTerm , generation );
258
+ try {
259
+ assert metadata != null && !metadata .isEmpty () && metadata .containsKey ("ckp-data" );
260
+ recoverCkpFileFromMetadata (metadata , location , generation , translogFilename );
261
+ } catch (Exception e ) {
262
+ throw new IOException ("Failed to recover checkpoint file from remote" , e );
263
+ }
264
+ }
245
265
return true ;
246
266
}
247
267
268
+ private Map <String , String > downloadTranslogToFSAndGetMetadata (String fileName , Path location , String primaryTerm , String generation )
269
+ throws IOException {
270
+ Path filePath = location .resolve (fileName );
271
+ // Here, we always override the existing file if present.
272
+ // We need to change this logic when we introduce incremental download
273
+ deleteFileIfExists (filePath );
274
+
275
+ boolean downloadStatus = false ;
276
+ long bytesToRead = 0 , downloadStartTime = System .nanoTime ();
277
+ Map <String , String > metadata ;
278
+
279
+ FetchBlobResult inputStreamWithMetadata = transferService .downloadBlobWithMetadata (
280
+ remoteDataTransferPath .add (primaryTerm ),
281
+ fileName
282
+ );
283
+ try {
284
+ InputStream inputStream = inputStreamWithMetadata .getInputStream ();
285
+ metadata = inputStreamWithMetadata .getMetadata ();
286
+
287
+ bytesToRead = inputStream .available ();
288
+ Files .copy (inputStream , filePath );
289
+ downloadStatus = true ;
290
+
291
+ } finally {
292
+ remoteTranslogTransferTracker .addDownloadTimeInMillis ((System .nanoTime () - downloadStartTime ) / 1_000_000L );
293
+ if (downloadStatus ) {
294
+ remoteTranslogTransferTracker .addDownloadBytesSucceeded (bytesToRead );
295
+ }
296
+ }
297
+
298
+ // Mark in FileTransferTracker so that the same files are not uploaded at the time of translog sync
299
+ fileTransferTracker .add (fileName , true );
300
+
301
+ return metadata ;
302
+ }
303
+
304
+ /**
305
+ * Process the provided metadata and tries to write the content of the checkpoint (ckp) file to the FS.
306
+ */
307
+ private void recoverCkpFileFromMetadata (Map <String , String > metadata , Path location , String generation , String fileName )
308
+ throws IOException {
309
+
310
+ boolean downloadStatus = false ;
311
+ long bytesToRead = 0 ;
312
+ try {
313
+ String ckpFileName = Translog .getCommitCheckpointFileName (Long .parseLong (generation ));
314
+ Path filePath = location .resolve (ckpFileName );
315
+ // Here, we always override the existing file if present.
316
+ deleteFileIfExists (filePath );
317
+
318
+ String ckpDataBase64 = metadata .get ("ckp-data" );
319
+ if (ckpDataBase64 == null ) {
320
+ logger .error ("Error processing metadata for translog file: {}" , fileName );
321
+ throw new IllegalStateException (
322
+ "Checkpoint file data (key - ckp-data) is expected but not found in metadata for file: " + fileName
323
+ );
324
+ }
325
+ byte [] ckpFileBytes = Base64 .getDecoder ().decode (ckpDataBase64 );
326
+ bytesToRead = ckpFileBytes .length ;
327
+
328
+ Files .write (filePath , ckpFileBytes );
329
+ downloadStatus = true ;
330
+ } finally {
331
+ if (downloadStatus ) {
332
+ remoteTranslogTransferTracker .addDownloadBytesSucceeded (bytesToRead );
333
+ }
334
+ }
335
+ }
336
+
337
+ public void deleteFileIfExists (Path filePath ) throws IOException {
338
+ if (Files .exists (filePath )) {
339
+ Files .delete (filePath );
340
+ }
341
+ }
342
+
248
343
private void downloadToFS (String fileName , Path location , String primaryTerm ) throws IOException {
249
344
Path filePath = location .resolve (fileName );
250
345
// Here, we always override the existing file if present.
@@ -391,7 +486,11 @@ public void deleteGenerationAsync(long primaryTerm, Set<Long> generations, Runna
391
486
// Add .ckp and .tlog file to translog file list which is located in basePath/<primaryTerm>
392
487
String ckpFileName = Translog .getCommitCheckpointFileName (generation );
393
488
String translogFileName = Translog .getFilename (generation );
394
- translogFiles .addAll (List .of (ckpFileName , translogFileName ));
489
+ if (ckpAsMetadata == false ) {
490
+ translogFiles .addAll (List .of (ckpFileName , translogFileName ));
491
+ } else {
492
+ translogFiles .add (translogFileName );
493
+ }
395
494
});
396
495
// Delete the translog and checkpoint files asynchronously
397
496
deleteTranslogFilesAsync (primaryTerm , translogFiles , onCompletion );
0 commit comments