Skip to content

Commit 4859e15

Browse files
authored
[Refactor] Remove CollectionUtils.sortAndDedup and use java TreeSet instead of hppc ObjectArrayList (#6884)
* [Refactor] CollectionUtils.sortAndDedup to use java lists HPPC ObjectArrayLists provide no benefit in CollectionUtils.sortAndDedup and adds an unnecessary library dependency. This commit removes that dependency on hppc by switching the sortAndDeup method to use java.util.Lists. BinaryFieldMapper logic is updated to use the generic java list. Signed-off-by: Nicholas Walter Knize <nknize@apache.org> * close BytesStreamOutput in CustomBinaryDocValuesField.bytes Signed-off-by: Nicholas Walter Knize <nknize@apache.org> --------- Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
1 parent cdab7f2 commit 4859e15

File tree

3 files changed

+65
-69
lines changed

3 files changed

+65
-69
lines changed

server/src/main/java/org/opensearch/common/util/CollectionUtils.java

Lines changed: 22 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,10 @@
3232

3333
package org.opensearch.common.util;
3434

35-
import com.carrotsearch.hppc.ObjectArrayList;
3635
import org.apache.lucene.util.BytesRef;
3736
import org.apache.lucene.util.BytesRefArray;
3837
import org.apache.lucene.util.BytesRefBuilder;
3938
import org.apache.lucene.util.InPlaceMergeSorter;
40-
import org.apache.lucene.util.IntroSorter;
4139
import org.opensearch.common.Strings;
4240
import org.opensearch.common.collect.Iterators;
4341

@@ -50,7 +48,9 @@
5048
import java.util.Comparator;
5149
import java.util.HashMap;
5250
import java.util.IdentityHashMap;
51+
import java.util.Iterator;
5352
import java.util.List;
53+
import java.util.ListIterator;
5454
import java.util.Locale;
5555
import java.util.Map;
5656
import java.util.Objects;
@@ -95,59 +95,28 @@ public static <T> List<T> rotate(final List<T> list, int distance) {
9595
return new RotatedList<>(list, d);
9696
}
9797

98-
public static void sortAndDedup(final ObjectArrayList<byte[]> array) {
99-
int len = array.size();
100-
if (len > 1) {
101-
sort(array);
102-
int uniqueCount = 1;
103-
for (int i = 1; i < len; ++i) {
104-
if (!Arrays.equals(array.get(i), array.get(i - 1))) {
105-
array.set(uniqueCount++, array.get(i));
106-
}
107-
}
108-
array.elementsCount = uniqueCount;
98+
/**
99+
* in place de-duplicates items in a list
100+
*/
101+
public static <T> void sortAndDedup(final List<T> array, Comparator<T> comparator) {
102+
// base case: one item
103+
if (array.size() <= 1) {
104+
return;
109105
}
110-
}
111-
112-
public static void sort(final ObjectArrayList<byte[]> array) {
113-
new IntroSorter() {
114-
115-
byte[] pivot;
116-
117-
@Override
118-
protected void swap(int i, int j) {
119-
final byte[] tmp = array.get(i);
120-
array.set(i, array.get(j));
121-
array.set(j, tmp);
122-
}
123-
124-
@Override
125-
protected int compare(int i, int j) {
126-
return compare(array.get(i), array.get(j));
127-
}
128-
129-
@Override
130-
protected void setPivot(int i) {
131-
pivot = array.get(i);
132-
}
133-
134-
@Override
135-
protected int comparePivot(int j) {
136-
return compare(pivot, array.get(j));
106+
array.sort(comparator);
107+
ListIterator<T> deduped = array.listIterator();
108+
T cmp = deduped.next(); // return the first item and advance
109+
Iterator<T> oldArray = array.iterator();
110+
oldArray.next(); // advance to the old to the second item (advanced to third below)
111+
112+
do {
113+
T old = oldArray.next(); // get the next item and advance iter
114+
if (comparator.compare(cmp, old) != 0 && (cmp = deduped.next()) != old) {
115+
deduped.set(old);
137116
}
138-
139-
private int compare(byte[] left, byte[] right) {
140-
for (int i = 0, j = 0; i < left.length && j < right.length; i++, j++) {
141-
int a = left[i] & 0xFF;
142-
int b = right[j] & 0xFF;
143-
if (a != b) {
144-
return a - b;
145-
}
146-
}
147-
return left.length - right.length;
148-
}
149-
150-
}.sort(0, array.size());
117+
} while (oldArray.hasNext());
118+
// in place update
119+
array.subList(deduped.nextIndex(), array.size()).clear();
151120
}
152121

153122
public static int[] toArray(Collection<Integer> ints) {

server/src/main/java/org/opensearch/index/mapper/BinaryFieldMapper.java

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232

3333
package org.opensearch.index.mapper;
3434

35-
import com.carrotsearch.hppc.ObjectArrayList;
3635
import org.apache.lucene.document.StoredField;
3736
import org.apache.lucene.search.Query;
3837
import org.apache.lucene.util.BytesRef;
@@ -52,6 +51,7 @@
5251

5352
import java.io.IOException;
5453
import java.time.ZoneId;
54+
import java.util.ArrayList;
5555
import java.util.Arrays;
5656
import java.util.Base64;
5757
import java.util.Collections;
@@ -240,35 +240,34 @@ protected String contentType() {
240240
*/
241241
public static class CustomBinaryDocValuesField extends CustomDocValuesField {
242242

243-
private final ObjectArrayList<byte[]> bytesList;
244-
245-
private int totalSize = 0;
243+
private final ArrayList<byte[]> bytesList;
246244

247245
public CustomBinaryDocValuesField(String name, byte[] bytes) {
248246
super(name);
249-
bytesList = new ObjectArrayList<>();
247+
bytesList = new ArrayList<>();
250248
add(bytes);
251249
}
252250

253251
public void add(byte[] bytes) {
254252
bytesList.add(bytes);
255-
totalSize += bytes.length;
256253
}
257254

258255
@Override
259256
public BytesRef binaryValue() {
260257
try {
261-
CollectionUtils.sortAndDedup(bytesList);
262-
int size = bytesList.size();
263-
BytesStreamOutput out = new BytesStreamOutput(totalSize + (size + 1) * 5);
264-
out.writeVInt(size); // write total number of values
265-
for (int i = 0; i < size; i++) {
266-
final byte[] value = bytesList.get(i);
267-
int valueLength = value.length;
268-
out.writeVInt(valueLength);
269-
out.writeBytes(value, 0, valueLength);
258+
// sort and dedup in place
259+
CollectionUtils.sortAndDedup(bytesList, Arrays::compareUnsigned);
260+
int size = bytesList.stream().map(b -> b.length).reduce(0, Integer::sum);
261+
int length = bytesList.size();
262+
try (BytesStreamOutput out = new BytesStreamOutput(size + (length + 1) * 5)) {
263+
out.writeVInt(length); // write total number of values
264+
for (byte[] value : bytesList) {
265+
int valueLength = value.length;
266+
out.writeVInt(valueLength);
267+
out.writeBytes(value, 0, valueLength);
268+
}
269+
return out.bytes().toBytesRef();
270270
}
271-
return out.bytes().toBytesRef();
272271
} catch (IOException e) {
273272
throw new OpenSearchException("Failed to get binary value", e);
274273
}

server/src/test/java/org/opensearch/common/util/CollectionUtilsTests.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,11 @@
4141
import java.util.ArrayList;
4242
import java.util.Arrays;
4343
import java.util.Collections;
44+
import java.util.Comparator;
4445
import java.util.HashMap;
4546
import java.util.HashSet;
4647
import java.util.Iterator;
48+
import java.util.LinkedList;
4749
import java.util.List;
4850
import java.util.Map;
4951
import java.util.SortedSet;
@@ -85,6 +87,32 @@ public void testRotate() {
8587
}
8688
}
8789

90+
private <T> void assertDeduped(List<T> array, Comparator<T> cmp, int expectedLength) {
91+
// test the dedup w/ ArrayLists and LinkedLists
92+
List<List<T>> types = List.of(new ArrayList<T>(array), new LinkedList<>(array));
93+
for (List<T> clone : types) {
94+
// dedup the list
95+
CollectionUtils.sortAndDedup(clone, cmp);
96+
// verify unique elements
97+
for (int i = 0; i < clone.size() - 1; ++i) {
98+
assertNotEquals(cmp.compare(clone.get(i), clone.get(i + 1)), 0);
99+
}
100+
assertEquals(expectedLength, clone.size());
101+
}
102+
}
103+
104+
public void testSortAndDedup() {
105+
// test no elements in a string array
106+
assertDeduped(List.<String>of(), Comparator.naturalOrder(), 0);
107+
// test no elements in an integer array
108+
assertDeduped(List.<Integer>of(), Comparator.naturalOrder(), 0);
109+
// test unsorted array
110+
assertDeduped(List.of(-1, 0, 2, 1, -1, 19, -1), Comparator.naturalOrder(), 5);
111+
// test sorted array
112+
assertDeduped(List.of(-1, 0, 1, 2, 19, 19), Comparator.naturalOrder(), 5);
113+
// test sorted
114+
}
115+
88116
public void testSortAndDedupByteRefArray() {
89117
SortedSet<BytesRef> set = new TreeSet<>();
90118
final int numValues = scaledRandomIntBetween(0, 10000);

0 commit comments

Comments
 (0)