Skip to content

Commit f58d291

Browse files
authored
Fix empty VALUES with ordinals grouping (#130861)
We should not build the sorted structure for the ordinal grouping operator if the requested position is larger than maxGroupId. This situation occurs with nulls. We should benchmark the ordinal blocks and consider removing the ordinal grouping operator if performance is similar; otherwise, we need to integrate this operator with GroupingAggregatorFunctionTestCase. Relates #130576
1 parent a27784b commit f58d291

File tree

7 files changed

+114
-6
lines changed

7 files changed

+114
-6
lines changed

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesBytesRefAggregator.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesDoubleAggregator.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesFloatAggregator.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesIntAggregator.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesLongAggregator.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-ValuesAggregator.java.st

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,10 @@ $endif$
129129
}
130130

131131
public static void combineStates(GroupingState current, int currentGroupId, GroupingState state, int statePosition) {
132-
var sorted = state.sortedForOrdinalMerging(current);
133132
if (statePosition > state.maxGroupId) {
134133
return;
135134
}
135+
var sorted = state.sortedForOrdinalMerging(current);
136136
var start = statePosition > 0 ? sorted.counts[statePosition - 1] : 0;
137137
var end = sorted.counts[statePosition];
138138
for (int i = start; i < end; i++) {

x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.apache.lucene.document.Field;
1212
import org.apache.lucene.document.LongField;
1313
import org.apache.lucene.document.LongPoint;
14+
import org.apache.lucene.document.SortedNumericDocValuesField;
1415
import org.apache.lucene.document.SortedSetDocValuesField;
1516
import org.apache.lucene.index.DirectoryReader;
1617
import org.apache.lucene.index.IndexReader;
@@ -35,6 +36,7 @@
3536
import org.elasticsearch.common.util.MockPageCacheRecycler;
3637
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
3738
import org.elasticsearch.compute.aggregation.CountAggregatorFunction;
39+
import org.elasticsearch.compute.aggregation.ValuesLongAggregatorFunctionSupplier;
3840
import org.elasticsearch.compute.aggregation.blockhash.BlockHash;
3941
import org.elasticsearch.compute.data.Block;
4042
import org.elasticsearch.compute.data.BlockFactory;
@@ -254,6 +256,112 @@ public String toString() {
254256
assertThat(blockFactory.breaker().getUsed(), equalTo(0L));
255257
}
256258

259+
// TODO: Remove ordinals grouping operator or enable it GroupingAggregatorFunctionTestCase
260+
public void testValuesWithOrdinalGrouping() throws Exception {
261+
DriverContext driverContext = driverContext();
262+
BlockFactory blockFactory = driverContext.blockFactory();
263+
264+
final int numDocs = between(100, 1000);
265+
Map<BytesRef, Set<Long>> expectedValues = new HashMap<>();
266+
try (BaseDirectoryWrapper dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
267+
String VAL_NAME = "val";
268+
String KEY_NAME = "key";
269+
for (int i = 0; i < numDocs; i++) {
270+
Document doc = new Document();
271+
BytesRef key = new BytesRef(Integer.toString(between(1, 100)));
272+
SortedSetDocValuesField keyField = new SortedSetDocValuesField(KEY_NAME, key);
273+
doc.add(keyField);
274+
if (randomBoolean()) {
275+
int numValues = between(0, 2);
276+
for (int v = 0; v < numValues; v++) {
277+
long val = between(1, 1000);
278+
var valuesField = new SortedNumericDocValuesField(VAL_NAME, val);
279+
doc.add(valuesField);
280+
expectedValues.computeIfAbsent(key, k -> new HashSet<>()).add(val);
281+
}
282+
}
283+
writer.addDocument(doc);
284+
}
285+
writer.commit();
286+
try (DirectoryReader reader = writer.getReader()) {
287+
List<Operator> operators = new ArrayList<>();
288+
if (randomBoolean()) {
289+
operators.add(new ShuffleDocsOperator(blockFactory));
290+
}
291+
operators.add(
292+
new ValuesSourceReaderOperator(
293+
blockFactory,
294+
List.of(
295+
new ValuesSourceReaderOperator.FieldInfo(
296+
VAL_NAME,
297+
ElementType.LONG,
298+
unused -> new BlockDocValuesReader.LongsBlockLoader(VAL_NAME)
299+
)
300+
),
301+
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
302+
throw new UnsupportedOperationException();
303+
}, 0.2)),
304+
0
305+
)
306+
);
307+
operators.add(
308+
new OrdinalsGroupingOperator(
309+
shardIdx -> new KeywordFieldMapper.KeywordFieldType(KEY_NAME).blockLoader(mockBlContext()),
310+
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE, 0.2)),
311+
ElementType.BYTES_REF,
312+
0,
313+
KEY_NAME,
314+
List.of(new ValuesLongAggregatorFunctionSupplier().groupingAggregatorFactory(INITIAL, List.of(1))),
315+
randomPageSize(),
316+
driverContext
317+
)
318+
);
319+
operators.add(
320+
new HashAggregationOperator(
321+
List.of(new ValuesLongAggregatorFunctionSupplier().groupingAggregatorFactory(FINAL, List.of(1))),
322+
() -> BlockHash.build(
323+
List.of(new BlockHash.GroupSpec(0, ElementType.BYTES_REF)),
324+
driverContext.blockFactory(),
325+
randomPageSize(),
326+
false
327+
),
328+
driverContext
329+
)
330+
);
331+
Map<BytesRef, Set<Long>> actualValues = new HashMap<>();
332+
Driver driver = TestDriverFactory.create(
333+
driverContext,
334+
luceneOperatorFactory(
335+
reader,
336+
List.of(new LuceneSliceQueue.QueryAndTags(new MatchAllDocsQuery(), List.of())),
337+
LuceneOperator.NO_LIMIT
338+
).get(driverContext),
339+
operators,
340+
new PageConsumerOperator(page -> {
341+
BytesRefBlock keyBlock = page.getBlock(0);
342+
LongBlock valueBlock = page.getBlock(1);
343+
BytesRef spare = new BytesRef();
344+
for (int p = 0; p < page.getPositionCount(); p++) {
345+
var key = keyBlock.getBytesRef(p, spare);
346+
int valueCount = valueBlock.getValueCount(p);
347+
for (int i = 0; i < valueCount; i++) {
348+
long val = valueBlock.getLong(valueBlock.getFirstValueIndex(p) + i);
349+
boolean added = actualValues.computeIfAbsent(BytesRef.deepCopyOf(key), k -> new HashSet<>()).add(val);
350+
assertTrue(actualValues.toString(), added);
351+
}
352+
}
353+
page.releaseBlocks();
354+
})
355+
);
356+
OperatorTestCase.runDriver(driver);
357+
assertDriverContext(driverContext);
358+
assertThat(actualValues, equalTo(expectedValues));
359+
org.elasticsearch.common.util.MockBigArrays.ensureAllArraysAreReleased();
360+
}
361+
}
362+
assertThat(blockFactory.breaker().getUsed(), equalTo(0L));
363+
}
364+
257365
public void testPushRoundToToQuery() throws IOException {
258366
long firstGroupMax = randomLong();
259367
long secondGroupMax = randomLong();

0 commit comments

Comments
 (0)