Skip to content

Commit b2ec3ef

Browse files
committed
Add exponential scaling FNV composite value hash algorithm for remote path
Signed-off-by: Ashish Singh <ssashish@amazon.com>
1 parent 645b1f1 commit b2ec3ef

File tree

9 files changed

+213
-45
lines changed

9 files changed

+213
-45
lines changed

server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,13 +272,13 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
272272
.get();
273273
assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status());
274274
ensureGreen(restoredIndexName1version2);
275-
validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
275+
validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64);
276276

277277
// Create index with cluster setting cluster.remote_store.index.path.prefix.type as hashed_prefix.
278278
indexSettings = getIndexSettings(1, 0).build();
279279
createIndex(indexName2, indexSettings);
280280
ensureGreen(indexName2);
281-
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
281+
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64);
282282

283283
// Validating that custom data has not changed for indexes which were created before the cluster setting got updated
284284
validatePathType(indexName1, PathType.FIXED);
@@ -309,7 +309,7 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
309309
ensureGreen(indexName2);
310310

311311
// Validating that custom data has not changed for testindex2 which was created before the cluster setting got updated
312-
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A);
312+
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_BASE64);
313313
}
314314

315315
private void validatePathType(String index, PathType pathType) {

server/src/main/java/org/opensearch/index/remote/RemoteStoreEnums.java

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import static java.util.Collections.unmodifiableMap;
2424
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA;
2525
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA;
26+
import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeUrlBase64AndBinaryEncodingUsing20Bits;
27+
import static org.opensearch.index.remote.RemoteStoreUtils.longToUrlBase64;
2628

2729
/**
2830
* This class contains the different enums related to remote store like data categories and types, path types
@@ -216,13 +218,26 @@ public static PathType parseString(String pathType) {
216218
@PublicApi(since = "2.14.0")
217219
public enum PathHashAlgorithm {
218220

219-
FNV_1A(0) {
221+
FNV_1A_BASE64(0) {
220222
@Override
221223
String hash(PathInput pathInput) {
222224
String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType()
223225
.getName();
224226
long hash = FNV1a.hash64(input);
225-
return RemoteStoreUtils.longToUrlBase64(hash);
227+
return longToUrlBase64(hash);
228+
}
229+
},
230+
/**
231+
* This hash algorithm will generate a hash value which will use 1st 6 bits to create bas64 character and next 14
232+
* bits to create binary string.
233+
*/
234+
FNV_1A_COMPOSITE(1) {
235+
@Override
236+
String hash(PathInput pathInput) {
237+
String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType()
238+
.getName();
239+
long hash = FNV1a.hash64(input);
240+
return longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(hash);
226241
}
227242
};
228243

server/src/main/java/org/opensearch/index/remote/RemoteStorePathStrategyResolver.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public RemoteStorePathStrategy get() {
3939
// Min node version check ensures that we are enabling the new prefix type only when all the nodes understand it.
4040
pathType = Version.CURRENT.compareTo(minNodeVersionSupplier.get()) <= 0 ? type : PathType.FIXED;
4141
// If the path type is fixed, hash algorithm is not applicable.
42-
pathHashAlgorithm = pathType == PathType.FIXED ? null : PathHashAlgorithm.FNV_1A;
42+
pathHashAlgorithm = pathType == PathType.FIXED ? null : PathHashAlgorithm.FNV_1A_BASE64;
4343
return new RemoteStorePathStrategy(pathType, pathHashAlgorithm);
4444
}
4545

server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,14 @@
1010

1111
import org.opensearch.common.collect.Tuple;
1212

13+
import java.math.BigInteger;
1314
import java.nio.ByteBuffer;
1415
import java.util.Arrays;
1516
import java.util.Base64;
17+
import java.util.Collections;
1618
import java.util.HashMap;
1719
import java.util.List;
20+
import java.util.Locale;
1821
import java.util.Map;
1922
import java.util.function.Function;
2023

@@ -26,10 +29,26 @@
2629
public class RemoteStoreUtils {
2730
public static final int LONG_MAX_LENGTH = String.valueOf(Long.MAX_VALUE).length();
2831

32+
/**
33+
* URL safe base 64 character set. This must not be changed as this is used in deriving the base64 equivalent of binary.
34+
*/
35+
private static final char[] URL_BASE64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".toCharArray();
36+
37+
private static Map<Character, Integer> BASE64_CHARSET_IDX_MAP;
38+
39+
static {
40+
Map<Character, Integer> charToIndexMap = new HashMap<>();
41+
for (int i = 0; i < URL_BASE64_CHARSET.length; i++) {
42+
charToIndexMap.put(URL_BASE64_CHARSET[i], i);
43+
}
44+
BASE64_CHARSET_IDX_MAP = Collections.unmodifiableMap(charToIndexMap);
45+
}
46+
2947
/**
3048
* This method subtracts given numbers from Long.MAX_VALUE and returns a string representation of the result.
3149
* The resultant string is guaranteed to be of the same length that of Long.MAX_VALUE. If shorter, we add left padding
3250
* of 0s to the string.
51+
*
3352
* @param num number to get the inverted long string for
3453
* @return String value of Long.MAX_VALUE - num
3554
*/
@@ -46,6 +65,7 @@ public static String invertLong(long num) {
4665

4766
/**
4867
* This method converts the given string into long and subtracts it from Long.MAX_VALUE
68+
*
4969
* @param str long in string format to be inverted
5070
* @return long value of the invert result
5171
*/
@@ -59,6 +79,7 @@ public static long invertLong(String str) {
5979

6080
/**
6181
* Extracts the segment name from the provided segment file name
82+
*
6283
* @param filename Segment file name to parse
6384
* @return Name of the segment that the segment file belongs to
6485
*/
@@ -79,10 +100,9 @@ public static String getSegmentName(String filename) {
79100
}
80101

81102
/**
82-
*
83103
* @param mdFiles List of segment/translog metadata files
84-
* @param fn Function to extract PrimaryTerm_Generation and Node Id from metadata file name .
85-
* fn returns null if node id is not part of the file name
104+
* @param fn Function to extract PrimaryTerm_Generation and Node Id from metadata file name .
105+
* fn returns null if node id is not part of the file name
86106
*/
87107
public static void verifyNoMultipleWriters(List<String> mdFiles, Function<String, Tuple<String, String>> fn) {
88108
Map<String, String> nodesByPrimaryTermAndGen = new HashMap<>();
@@ -116,4 +136,43 @@ static String longToUrlBase64(long value) {
116136
String base64Str = Base64.getUrlEncoder().encodeToString(hashBytes);
117137
return base64Str.substring(0, base64Str.length() - 1);
118138
}
139+
140+
static long urlBase64ToLong(String base64Str) {
141+
byte[] hashBytes = Base64.getUrlDecoder().decode(base64Str);
142+
return ByteBuffer.wrap(hashBytes).getLong();
143+
}
144+
145+
/**
146+
* Converts an input hash which occupies 64 bits of memory into a composite encoded string. The string will have 2 parts -
147+
* 1. Base 64 string and 2. Binary String. We will use the first 6 bits for creating the base 64 string.
148+
* For the second part, we will use the next 14 bits. For eg - A010001010100010.
149+
*/
150+
static String longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(long value) {
151+
return longToCompositeBase64AndBinaryEncoding(value, 20);
152+
}
153+
154+
/**
155+
* Converts an input hash which occupies 64 bits of memory into a composite encoded string. The string will have 2 parts -
156+
* 1. Base 64 string and 2. Binary String. We will use the first 6 bits for creating the base 64 string.
157+
* For the second part, the rest of the bits will be used as is in string form.
158+
*/
159+
static String longToCompositeBase64AndBinaryEncoding(long value, int len) {
160+
if (len < 7 || len > 64) {
161+
throw new IllegalArgumentException("In longToCompositeBase64AndBinaryEncoding, len must be between 7 and 64 (both inclusive)");
162+
}
163+
String binaryEncoding = String.format(Locale.ROOT, "%64s", Long.toBinaryString(value)).replace(' ', '0');
164+
String base64Part = binaryEncoding.substring(0, 6);
165+
String binaryPart = binaryEncoding.substring(6, len);
166+
int base64DecimalValue = Integer.valueOf(base64Part, 2);
167+
assert base64DecimalValue >= 0 && base64DecimalValue < 64;
168+
return URL_BASE64_CHARSET[base64DecimalValue] + binaryPart;
169+
}
170+
171+
static long compositeUrlBase64BinaryEncodingToLong(String encodedValue) {
172+
char ch = encodedValue.charAt(0);
173+
int base64BitsIntValue = BASE64_CHARSET_IDX_MAP.get(ch);
174+
String base64PartBinary = Integer.toBinaryString(base64BitsIntValue);
175+
String binaryString = base64PartBinary + encodedValue.substring(1);
176+
return new BigInteger(binaryString, 2).longValue();
177+
}
119178
}

server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1607,7 +1607,7 @@ public void testRemoteCustomData() {
16071607
validateRemoteCustomData(
16081608
indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY),
16091609
PathHashAlgorithm.NAME,
1610-
PathHashAlgorithm.FNV_1A.name()
1610+
PathHashAlgorithm.FNV_1A_BASE64.name()
16111611
);
16121612
}
16131613

0 commit comments

Comments
 (0)