Skip to content

Commit c20ae47

Browse files
committed
initial commits
Signed-off-by: Sandeep Kumawat <2025sandeepkumawat@gmail.com>
1 parent 0282e64 commit c20ae47

File tree

5 files changed

+165
-11
lines changed

5 files changed

+165
-11
lines changed

server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ public class RemoteFsTranslog extends Translog {
9191
private static final int SYNC_PERMIT = 1;
9292
private final Semaphore syncPermit = new Semaphore(SYNC_PERMIT);
9393
private final AtomicBoolean pauseSync = new AtomicBoolean(false);
94+
boolean ckpAsMetadata;
9495

9596
public RemoteFsTranslog(
9697
TranslogConfig config,
@@ -110,14 +111,16 @@ public RemoteFsTranslog(
110111
this.startedPrimarySupplier = startedPrimarySupplier;
111112
this.remoteTranslogTransferTracker = remoteTranslogTransferTracker;
112113
fileTransferTracker = new FileTransferTracker(shardId, remoteTranslogTransferTracker);
114+
ckpAsMetadata = true;
113115
this.translogTransferManager = buildTranslogTransferManager(
114116
blobStoreRepository,
115117
threadPool,
116118
shardId,
117119
fileTransferTracker,
118120
remoteTranslogTransferTracker,
119121
indexSettings().getRemoteStorePathStrategy(),
120-
remoteStoreSettings
122+
remoteStoreSettings,
123+
ckpAsMetadata
121124
);
122125
try {
123126
download(translogTransferManager, location, logger);
@@ -288,7 +291,8 @@ public static TranslogTransferManager buildTranslogTransferManager(
288291
FileTransferTracker fileTransferTracker,
289292
RemoteTranslogTransferTracker tracker,
290293
RemoteStorePathStrategy pathStrategy,
291-
RemoteStoreSettings remoteStoreSettings
294+
RemoteStoreSettings remoteStoreSettings,
295+
boolean ckpAsMetadata
292296
) {
293297
assert Objects.nonNull(pathStrategy);
294298
String indexUUID = shardId.getIndex().getUUID();
@@ -310,7 +314,16 @@ public static TranslogTransferManager buildTranslogTransferManager(
310314
.build();
311315
BlobPath mdPath = pathStrategy.generatePath(mdPathInput);
312316
BlobStoreTransferService transferService = new BlobStoreTransferService(blobStoreRepository.blobStore(), threadPool);
313-
return new TranslogTransferManager(shardId, transferService, dataPath, mdPath, fileTransferTracker, tracker, remoteStoreSettings);
317+
return new TranslogTransferManager(
318+
shardId,
319+
transferService,
320+
dataPath,
321+
mdPath,
322+
fileTransferTracker,
323+
tracker,
324+
remoteStoreSettings,
325+
ckpAsMetadata
326+
);
314327
}
315328

316329
@Override

server/src/main/java/org/opensearch/index/translog/transfer/FileSnapshot.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.nio.file.Path;
2323
import java.nio.file.StandardOpenOption;
2424
import java.util.Arrays;
25+
import java.util.Map;
2526
import java.util.Objects;
2627

2728
/**
@@ -108,6 +109,7 @@ public static class TransferFileSnapshot extends FileSnapshot {
108109

109110
private final long primaryTerm;
110111
private Long checksum;
112+
private Map<String, String> metadata;
111113

112114
public TransferFileSnapshot(Path path, long primaryTerm, Long checksum) throws IOException {
113115
super(path);
@@ -128,6 +130,14 @@ public long getPrimaryTerm() {
128130
return primaryTerm;
129131
}
130132

133+
public void setMetadata(Map<String, String> metadata) {
134+
this.metadata = metadata;
135+
}
136+
137+
public Map<String, String> getMetadata() {
138+
return metadata;
139+
}
140+
131141
@Override
132142
public int hashCode() {
133143
return Objects.hash(primaryTerm, super.hashCode());

server/src/main/java/org/opensearch/index/translog/transfer/TransferSnapshot.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.opensearch.index.translog.transfer.FileSnapshot.TransferFileSnapshot;
1313
import org.opensearch.index.translog.transfer.FileSnapshot.TranslogFileSnapshot;
1414

15+
import java.io.IOException;
1516
import java.util.Set;
1617

1718
/**
@@ -39,4 +40,6 @@ public interface TransferSnapshot {
3940
* @return the translog transfer metadata
4041
*/
4142
TranslogTransferMetadata getTranslogTransferMetadata();
43+
44+
Set<TransferFileSnapshot> getTranslogFileWithMetadataSnapshots() throws IOException;
4245
}

server/src/main/java/org/opensearch/index/translog/transfer/TranslogCheckpointTransferSnapshot.java

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,15 @@
1313

1414
import java.io.Closeable;
1515
import java.io.IOException;
16+
import java.nio.file.Files;
1617
import java.nio.file.Path;
1718
import java.util.ArrayList;
19+
import java.util.Base64;
20+
import java.util.HashMap;
1821
import java.util.HashSet;
1922
import java.util.LinkedList;
2023
import java.util.List;
24+
import java.util.Map;
2125
import java.util.Set;
2226
import java.util.function.Function;
2327
import java.util.stream.Collectors;
@@ -64,6 +68,31 @@ public Set<TransferFileSnapshot> getTranslogFileSnapshots() {
6468
return translogCheckpointFileInfoTupleSet.stream().map(Tuple::v1).collect(Collectors.toSet());
6569
}
6670

71+
@Override
72+
public Set<TransferFileSnapshot> getTranslogFileWithMetadataSnapshots() throws IOException {
73+
Set<TransferFileSnapshot> toUploadList = new HashSet<>();
74+
for (Tuple<TranslogFileSnapshot, CheckpointFileSnapshot> tuple : translogCheckpointFileInfoTupleSet) {
75+
TranslogFileSnapshot translogFileSnapshot = tuple.v1();
76+
CheckpointFileSnapshot checkpointFileSnapshot = tuple.v2();
77+
translogFileSnapshot.setMetadata(buildMetadata(checkpointFileSnapshot.getPath()));
78+
toUploadList.add(translogFileSnapshot);
79+
}
80+
return toUploadList;
81+
}
82+
83+
public Map<String, String> buildMetadata(Path checkpointPath) throws IOException {
84+
Map<String, String> metadata = new HashMap<>();
85+
String ckpString = buildCheckpointDataAsBase64String(checkpointPath);
86+
metadata.put("ckp-data", ckpString);
87+
return metadata;
88+
}
89+
90+
static String buildCheckpointDataAsBase64String(Path checkpointFilePath) throws IOException {
91+
long fileSize = Files.size(checkpointFilePath);
92+
byte[] fileBytes = Files.readAllBytes(checkpointFilePath);
93+
return Base64.getEncoder().encodeToString(fileBytes);
94+
}
95+
6796
@Override
6897
public TranslogTransferMetadata getTranslogTransferMetadata() {
6998
return new TranslogTransferMetadata(

server/src/main/java/org/opensearch/index/translog/transfer/TranslogTransferManager.java

Lines changed: 107 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.opensearch.common.SetOnce;
1717
import org.opensearch.common.blobstore.BlobMetadata;
1818
import org.opensearch.common.blobstore.BlobPath;
19+
import org.opensearch.common.blobstore.FetchBlobResult;
1920
import org.opensearch.common.blobstore.stream.write.WritePriority;
2021
import org.opensearch.common.io.VersionedCodecStreamWrapper;
2122
import org.opensearch.common.io.stream.BytesStreamOutput;
@@ -36,6 +37,7 @@
3637
import java.nio.file.Files;
3738
import java.nio.file.Path;
3839
import java.util.ArrayList;
40+
import java.util.Base64;
3941
import java.util.HashMap;
4042
import java.util.HashSet;
4143
import java.util.List;
@@ -63,6 +65,7 @@ public class TranslogTransferManager {
6365
private final RemoteTranslogTransferTracker remoteTranslogTransferTracker;
6466
private final RemoteStoreSettings remoteStoreSettings;
6567
private static final int METADATA_FILES_TO_FETCH = 10;
68+
boolean ckpAsMetadata;
6669

6770
private final Logger logger;
6871

@@ -79,7 +82,8 @@ public TranslogTransferManager(
7982
BlobPath remoteMetadataTransferPath,
8083
FileTransferTracker fileTransferTracker,
8184
RemoteTranslogTransferTracker remoteTranslogTransferTracker,
82-
RemoteStoreSettings remoteStoreSettings
85+
RemoteStoreSettings remoteStoreSettings,
86+
boolean ckpAsMetadata
8387
) {
8488
this.shardId = shardId;
8589
this.transferService = transferService;
@@ -89,6 +93,7 @@ public TranslogTransferManager(
8993
this.logger = Loggers.getLogger(getClass(), shardId);
9094
this.remoteTranslogTransferTracker = remoteTranslogTransferTracker;
9195
this.remoteStoreSettings = remoteStoreSettings;
96+
this.ckpAsMetadata = ckpAsMetadata;
9297
}
9398

9499
public RemoteTranslogTransferTracker getRemoteTranslogTransferTracker() {
@@ -110,8 +115,12 @@ public boolean transferSnapshot(TransferSnapshot transferSnapshot, TranslogTrans
110115
long prevUploadTimeInMillis = remoteTranslogTransferTracker.getTotalUploadTimeInMillis();
111116

112117
try {
113-
toUpload.addAll(fileTransferTracker.exclusionFilter(transferSnapshot.getTranslogFileSnapshots()));
114-
toUpload.addAll(fileTransferTracker.exclusionFilter((transferSnapshot.getCheckpointFileSnapshots())));
118+
if (ckpAsMetadata) {
119+
toUpload.addAll(fileTransferTracker.exclusionFilter(transferSnapshot.getTranslogFileWithMetadataSnapshots()));
120+
} else {
121+
toUpload.addAll(fileTransferTracker.exclusionFilter(transferSnapshot.getTranslogFileSnapshots()));
122+
toUpload.addAll(fileTransferTracker.exclusionFilter((transferSnapshot.getCheckpointFileSnapshots())));
123+
}
115124
if (toUpload.isEmpty()) {
116125
logger.trace("Nothing to upload for transfer");
117126
return true;
@@ -236,15 +245,101 @@ public boolean downloadTranslog(String primaryTerm, String generation, Path loca
236245
generation,
237246
location
238247
);
239-
// Download Checkpoint file from remote to local FS
240248
String ckpFileName = Translog.getCommitCheckpointFileName(Long.parseLong(generation));
241-
downloadToFS(ckpFileName, location, primaryTerm);
242-
// Download translog file from remote to local FS
243249
String translogFilename = Translog.getFilename(Long.parseLong(generation));
244-
downloadToFS(translogFilename, location, primaryTerm);
250+
if (ckpAsMetadata == false) {
251+
// Download Checkpoint file from remote to local FS
252+
downloadToFS(ckpFileName, location, primaryTerm);
253+
// Download translog file from remote to local FS
254+
downloadToFS(translogFilename, location, primaryTerm);
255+
} else {
256+
// Download translog.tlog file with object metadata from remote to local FS
257+
Map<String, String> metadata = downloadTranslogToFSAndGetMetadata(translogFilename, location, primaryTerm, generation);
258+
try {
259+
assert metadata != null && !metadata.isEmpty() && metadata.containsKey("ckp-data");
260+
recoverCkpFileFromMetadata(metadata, location, generation, translogFilename);
261+
} catch (Exception e) {
262+
throw new IOException("Failed to recover checkpoint file from remote", e);
263+
}
264+
}
245265
return true;
246266
}
247267

268+
private Map<String, String> downloadTranslogToFSAndGetMetadata(String fileName, Path location, String primaryTerm, String generation)
269+
throws IOException {
270+
Path filePath = location.resolve(fileName);
271+
// Here, we always override the existing file if present.
272+
// We need to change this logic when we introduce incremental download
273+
deleteFileIfExists(filePath);
274+
275+
boolean downloadStatus = false;
276+
long bytesToRead = 0, downloadStartTime = System.nanoTime();
277+
Map<String, String> metadata;
278+
279+
FetchBlobResult inputStreamWithMetadata = transferService.downloadBlobWithMetadata(
280+
remoteDataTransferPath.add(primaryTerm),
281+
fileName
282+
);
283+
try {
284+
InputStream inputStream = inputStreamWithMetadata.getInputStream();
285+
metadata = inputStreamWithMetadata.getMetadata();
286+
287+
bytesToRead = inputStream.available();
288+
Files.copy(inputStream, filePath);
289+
downloadStatus = true;
290+
291+
} finally {
292+
remoteTranslogTransferTracker.addDownloadTimeInMillis((System.nanoTime() - downloadStartTime) / 1_000_000L);
293+
if (downloadStatus) {
294+
remoteTranslogTransferTracker.addDownloadBytesSucceeded(bytesToRead);
295+
}
296+
}
297+
298+
// Mark in FileTransferTracker so that the same files are not uploaded at the time of translog sync
299+
fileTransferTracker.add(fileName, true);
300+
301+
return metadata;
302+
}
303+
304+
/**
305+
* Process the provided metadata and tries to write the content of the checkpoint (ckp) file to the FS.
306+
*/
307+
private void recoverCkpFileFromMetadata(Map<String, String> metadata, Path location, String generation, String fileName)
308+
throws IOException {
309+
310+
boolean downloadStatus = false;
311+
long bytesToRead = 0;
312+
try {
313+
String ckpFileName = Translog.getCommitCheckpointFileName(Long.parseLong(generation));
314+
Path filePath = location.resolve(ckpFileName);
315+
// Here, we always override the existing file if present.
316+
deleteFileIfExists(filePath);
317+
318+
String ckpDataBase64 = metadata.get("ckp-data");
319+
if (ckpDataBase64 == null) {
320+
logger.error("Error processing metadata for translog file: {}", fileName);
321+
throw new IllegalStateException(
322+
"Checkpoint file data (key - ckp-data) is expected but not found in metadata for file: " + fileName
323+
);
324+
}
325+
byte[] ckpFileBytes = Base64.getDecoder().decode(ckpDataBase64);
326+
bytesToRead = ckpFileBytes.length;
327+
328+
Files.write(filePath, ckpFileBytes);
329+
downloadStatus = true;
330+
} finally {
331+
if (downloadStatus) {
332+
remoteTranslogTransferTracker.addDownloadBytesSucceeded(bytesToRead);
333+
}
334+
}
335+
}
336+
337+
public void deleteFileIfExists(Path filePath) throws IOException {
338+
if (Files.exists(filePath)) {
339+
Files.delete(filePath);
340+
}
341+
}
342+
248343
private void downloadToFS(String fileName, Path location, String primaryTerm) throws IOException {
249344
Path filePath = location.resolve(fileName);
250345
// Here, we always override the existing file if present.
@@ -391,7 +486,11 @@ public void deleteGenerationAsync(long primaryTerm, Set<Long> generations, Runna
391486
// Add .ckp and .tlog file to translog file list which is located in basePath/<primaryTerm>
392487
String ckpFileName = Translog.getCommitCheckpointFileName(generation);
393488
String translogFileName = Translog.getFilename(generation);
394-
translogFiles.addAll(List.of(ckpFileName, translogFileName));
489+
if (ckpAsMetadata == false) {
490+
translogFiles.addAll(List.of(ckpFileName, translogFileName));
491+
} else {
492+
translogFiles.add(translogFileName);
493+
}
395494
});
396495
// Delete the translog and checkpoint files asynchronously
397496
deleteTranslogFilesAsync(primaryTerm, translogFiles, onCompletion);

0 commit comments

Comments
 (0)