Skip to content

Add optimized path for intermediate values aggregator #131390

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 21, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -113,16 +113,16 @@ static void selfTest() {
@Param({ BYTES_REF, INT, LONG })
public String dataType;

private static Operator operator(DriverContext driverContext, int groups, String dataType) {
private static Operator operator(DriverContext driverContext, int groups, String dataType, AggregatorMode mode) {
if (groups == 1) {
return new AggregationOperator(
List.of(supplier(dataType).aggregatorFactory(AggregatorMode.SINGLE, List.of(0)).apply(driverContext)),
List.of(supplier(dataType).aggregatorFactory(mode, List.of(0)).apply(driverContext)),
driverContext
);
}
List<BlockHash.GroupSpec> groupSpec = List.of(new BlockHash.GroupSpec(0, ElementType.LONG));
return new HashAggregationOperator(
List.of(supplier(dataType).groupingAggregatorFactory(AggregatorMode.SINGLE, List.of(1))),
List.of(supplier(dataType).groupingAggregatorFactory(mode, List.of(1))),
() -> BlockHash.build(groupSpec, driverContext.blockFactory(), 16 * 1024, false),
driverContext
) {
Expand Down Expand Up @@ -177,6 +177,9 @@ private static void checkGrouped(String prefix, int groups, String dataType, Pag

// Check them
BytesRefBlock values = page.getBlock(1);
if (values.asOrdinals() == null) {
throw new AssertionError(" expected ordinals; but got " + values);
}
for (int p = 0; p < groups; p++) {
checkExpectedBytesRef(prefix, values, p, expected.get(p));
}
Expand Down Expand Up @@ -341,13 +344,21 @@ public void run() {

private static void run(int groups, String dataType, int opCount) {
DriverContext driverContext = driverContext();
try (Operator operator = operator(driverContext, groups, dataType)) {
Page page = page(groups, dataType);
for (int i = 0; i < opCount; i++) {
operator.addInput(page.shallowCopy());
try (Operator finalAggregator = operator(driverContext, groups, dataType, AggregatorMode.FINAL)) {
try (Operator initialAggregator = operator(driverContext, groups, dataType, AggregatorMode.INITIAL)) {
Page rawPage = page(groups, dataType);
for (int i = 0; i < opCount; i++) {
initialAggregator.addInput(rawPage.shallowCopy());
}
initialAggregator.finish();
Page intermediatePage = initialAggregator.getOutput();
for (int i = 0; i < opCount; i++) {
finalAggregator.addInput(intermediatePage.shallowCopy());
}
}
operator.finish();
checkExpected(groups, dataType, operator.getOutput());
finalAggregator.finish();
Page outputPage = finalAggregator.getOutput();
checkExpected(groups, dataType, outputPage);
}
}

Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/131390.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 131390
summary: Add optimized path for intermediate values aggregator
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -608,63 +608,80 @@ private MethodSpec addIntermediateInput() {
.collect(joining(" && "))
);
}
if (intermediateState.stream().map(AggregatorImplementer.IntermediateStateDesc::elementType).anyMatch(n -> n.equals("BYTES_REF"))) {
builder.addStatement("$T scratch = new $T()", BYTES_REF, BYTES_REF);
}
builder.beginControlFlow("for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++)");
{
builder.addStatement("int groupId = groups.getInt(groupPosition)");
if (aggState.declaredType().isPrimitive()) {
if (warnExceptions.isEmpty()) {
assert intermediateState.size() == 2;
assert intermediateState.get(1).name().equals("seen");
builder.beginControlFlow("if (seen.getBoolean(groupPosition + positionOffset))");
} else {
assert intermediateState.size() == 3;
assert intermediateState.get(1).name().equals("seen");
assert intermediateState.get(2).name().equals("failed");
builder.beginControlFlow("if (failed.getBoolean(groupPosition + positionOffset))");
{
builder.addStatement("state.setFailed(groupId)");
var bulkCombineIntermediateMethod = optionalStaticMethod(
declarationType,
requireVoidType(),
requireName("combineIntermediate"),
requireArgs(
Stream.of(
Stream.of(aggState.declaredType(), TypeName.INT, INT_VECTOR), // aggState, positionOffset, groupIds
intermediateState.stream().map(AggregatorImplementer.IntermediateStateDesc::combineArgType)
).flatMap(Function.identity()).map(Methods::requireType).toArray(Methods.TypeMatcher[]::new)
)
);
if (bulkCombineIntermediateMethod != null) {
var states = intermediateState.stream().map(AggregatorImplementer.IntermediateStateDesc::name).collect(Collectors.joining(","));
builder.addStatement("$T.combineIntermediate(state, positionOffset, groups," + states + ")", declarationType);
} else {
if (intermediateState.stream()
.map(AggregatorImplementer.IntermediateStateDesc::elementType)
.anyMatch(n -> n.equals("BYTES_REF"))) {
builder.addStatement("$T scratch = new $T()", BYTES_REF, BYTES_REF);
}
builder.beginControlFlow("for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++)");
{
builder.addStatement("int groupId = groups.getInt(groupPosition)");
if (aggState.declaredType().isPrimitive()) {
if (warnExceptions.isEmpty()) {
assert intermediateState.size() == 2;
assert intermediateState.get(1).name().equals("seen");
builder.beginControlFlow("if (seen.getBoolean(groupPosition + positionOffset))");
} else {
assert intermediateState.size() == 3;
assert intermediateState.get(1).name().equals("seen");
assert intermediateState.get(2).name().equals("failed");
builder.beginControlFlow("if (failed.getBoolean(groupPosition + positionOffset))");
{
builder.addStatement("state.setFailed(groupId)");
}
builder.nextControlFlow("else if (seen.getBoolean(groupPosition + positionOffset))");
}
builder.nextControlFlow("else if (seen.getBoolean(groupPosition + positionOffset))");
}

warningsBlock(builder, () -> {
var name = intermediateState.get(0).name();
var vectorAccessor = vectorAccessorName(intermediateState.get(0).elementType());
builder.addStatement(
"state.set(groupId, $T.combine(state.getOrDefault(groupId), $L.$L(groupPosition + positionOffset)))",
warningsBlock(builder, () -> {
var name = intermediateState.get(0).name();
var vectorAccessor = vectorAccessorName(intermediateState.get(0).elementType());
builder.addStatement(
"state.set(groupId, $T.combine(state.getOrDefault(groupId), $L.$L(groupPosition + positionOffset)))",
declarationType,
name,
vectorAccessor
);
});
builder.endControlFlow();
} else {
var stateHasBlock = intermediateState.stream().anyMatch(AggregatorImplementer.IntermediateStateDesc::block);
requireStaticMethod(
declarationType,
name,
vectorAccessor
requireVoidType(),
requireName("combineIntermediate"),
requireArgs(
Stream.of(
Stream.of(aggState.declaredType(), TypeName.INT), // aggState and groupId
intermediateState.stream().map(AggregatorImplementer.IntermediateStateDesc::combineArgType),
Stream.of(TypeName.INT).filter(p -> stateHasBlock) // position
).flatMap(Function.identity()).map(Methods::requireType).toArray(Methods.TypeMatcher[]::new)
)
);
builder.addStatement(
"$T.combineIntermediate(state, groupId, "
+ intermediateState.stream().map(desc -> desc.access("groupPosition + positionOffset")).collect(joining(", "))
+ (stateHasBlock ? ", groupPosition + positionOffset" : "")
+ ")",
declarationType
);
});
}
builder.endControlFlow();
} else {
var stateHasBlock = intermediateState.stream().anyMatch(AggregatorImplementer.IntermediateStateDesc::block);
requireStaticMethod(
declarationType,
requireVoidType(),
requireName("combineIntermediate"),
requireArgs(
Stream.of(
Stream.of(aggState.declaredType(), TypeName.INT), // aggState and groupId
intermediateState.stream().map(AggregatorImplementer.IntermediateStateDesc::combineArgType),
Stream.of(TypeName.INT).filter(p -> stateHasBlock) // position
).flatMap(Function.identity()).map(Methods::requireType).toArray(Methods.TypeMatcher[]::new)
)
);

builder.addStatement(
"$T.combineIntermediate(state, groupId, "
+ intermediateState.stream().map(desc -> desc.access("groupPosition + positionOffset")).collect(joining(", "))
+ (stateHasBlock ? ", groupPosition + positionOffset" : "")
+ ")",
declarationType
);
}
builder.endControlFlow();
}
return builder.build();
}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,7 @@ static GroupingAggregatorFunction.AddInput wrapAddInput(
if (valuesOrdinal == null) {
return delegate;
}
BytesRefVector dict = valuesOrdinal.getDictionaryVector();
final IntVector hashIds;
BytesRef spare = new BytesRef();
try (var hashIdsBuilder = values.blockFactory().newIntVectorFixedBuilder(dict.getPositionCount())) {
for (int p = 0; p < dict.getPositionCount(); p++) {
hashIdsBuilder.appendInt(Math.toIntExact(BlockHash.hashOrdToGroup(state.bytes.add(dict.getBytesRef(p, spare)))));
}
hashIds = hashIdsBuilder.build();
}
final IntVector hashIds = hashDict(state, valuesOrdinal.getDictionaryVector());
IntBlock ordinalIds = valuesOrdinal.getOrdinalsBlock();
return new GroupingAggregatorFunction.AddInput() {
@Override
Expand Down Expand Up @@ -85,17 +77,7 @@ public void add(int positionOffset, IntBigArrayBlock groupIds) {

@Override
public void add(int positionOffset, IntVector groupIds) {
for (int groupPosition = 0; groupPosition < groupIds.getPositionCount(); groupPosition++) {
int groupId = groupIds.getInt(groupPosition);
if (ordinalIds.isNull(groupPosition + positionOffset)) {
continue;
}
int valuesStart = ordinalIds.getFirstValueIndex(groupPosition + positionOffset);
int valuesEnd = valuesStart + ordinalIds.getValueCount(groupPosition + positionOffset);
for (int v = valuesStart; v < valuesEnd; v++) {
state.addValueOrdinal(groupId, hashIds.getInt(ordinalIds.getInt(v)));
}
}
addOrdinalInputBlock(state, positionOffset, groupIds, ordinalIds, hashIds);
}

@Override
Expand All @@ -114,15 +96,7 @@ static GroupingAggregatorFunction.AddInput wrapAddInput(
if (valuesOrdinal == null) {
return delegate;
}
BytesRefVector dict = valuesOrdinal.getDictionaryVector();
final IntVector hashIds;
BytesRef spare = new BytesRef();
try (var hashIdsBuilder = values.blockFactory().newIntVectorFixedBuilder(dict.getPositionCount())) {
for (int p = 0; p < dict.getPositionCount(); p++) {
hashIdsBuilder.appendInt(Math.toIntExact(BlockHash.hashOrdToGroup(state.bytes.add(dict.getBytesRef(p, spare)))));
}
hashIds = hashIdsBuilder.build();
}
final IntVector hashIds = hashDict(state, valuesOrdinal.getDictionaryVector());
var ordinalIds = valuesOrdinal.getOrdinalsVector();
return new GroupingAggregatorFunction.AddInput() {
@Override
Expand Down Expand Up @@ -157,10 +131,7 @@ public void add(int positionOffset, IntBigArrayBlock groupIds) {

@Override
public void add(int positionOffset, IntVector groupIds) {
for (int groupPosition = 0; groupPosition < groupIds.getPositionCount(); groupPosition++) {
int groupId = groupIds.getInt(groupPosition);
state.addValueOrdinal(groupId, hashIds.getInt(ordinalIds.getInt(groupPosition + positionOffset)));
}
addOrdinalInputVector(state, positionOffset, groupIds, ordinalIds, hashIds);
}

@Override
Expand All @@ -169,4 +140,86 @@ public void close() {
}
};
}

static IntVector hashDict(ValuesBytesRefAggregator.GroupingState state, BytesRefVector dict) {
BytesRef scratch = new BytesRef();
try (var hashIdsBuilder = dict.blockFactory().newIntVectorFixedBuilder(dict.getPositionCount())) {
for (int p = 0; p < dict.getPositionCount(); p++) {
final long hashId = BlockHash.hashOrdToGroup(state.bytes.add(dict.getBytesRef(p, scratch)));
hashIdsBuilder.appendInt(Math.toIntExact(hashId));
}
return hashIdsBuilder.build();
}
}

static void addOrdinalInputBlock(
ValuesBytesRefAggregator.GroupingState state,
int positionOffset,
IntVector groupIds,
IntBlock ordinalIds,
IntVector hashIds
) {
for (int p = 0; p < groupIds.getPositionCount(); p++) {
final int valuePosition = p + positionOffset;
final int groupId = groupIds.getInt(valuePosition);
final int start = ordinalIds.getFirstValueIndex(valuePosition);
final int end = start + ordinalIds.getValueCount(valuePosition);
for (int i = start; i < end; i++) {
int ord = ordinalIds.getInt(i);
state.addValueOrdinal(groupId, hashIds.getInt(ord));
}
}
}

static void addOrdinalInputVector(
ValuesBytesRefAggregator.GroupingState state,
int positionOffset,
IntVector groupIds,
IntVector ordinalIds,
IntVector hashIds
) {
for (int p = 0; p < groupIds.getPositionCount(); p++) {
int groupId = groupIds.getInt(p);
int ord = ordinalIds.getInt(p + positionOffset);
state.addValueOrdinal(groupId, hashIds.getInt(ord));
}
}

static void combineIntermediateInputValues(
ValuesBytesRefAggregator.GroupingState state,
int positionOffset,
IntVector groupIds,
BytesRefBlock values
) {
BytesRefVector dict = null;
IntBlock ordinals = null;
{
final OrdinalBytesRefBlock asOrdinals = values.asOrdinals();
if (asOrdinals != null) {
dict = asOrdinals.getDictionaryVector();
ordinals = asOrdinals.getOrdinalsBlock();
}
}
if (dict != null && dict.getPositionCount() < groupIds.getPositionCount()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this use OrdinalBytesRefBlock.isDense(), or are the logics not related?

try (var hashIds = hashDict(state, dict)) {
IntVector ordinalsVector = ordinals.asVector();
if (ordinalsVector != null) {
addOrdinalInputVector(state, positionOffset, groupIds, ordinalsVector, hashIds);
} else {
addOrdinalInputBlock(state, positionOffset, groupIds, ordinals, hashIds);
}
}
} else {
final BytesRef scratch = new BytesRef();
for (int p = 0; p < groupIds.getPositionCount(); p++) {
final int valuePosition = p + positionOffset;
final int groupId = groupIds.getInt(valuePosition);
final int start = values.getFirstValueIndex(valuePosition);
final int end = start + values.getValueCount(valuePosition);
for (int i = start; i < end; i++) {
state.addValue(groupId, values.getBytesRef(i, scratch));
}
}
}
}
}
Loading
Loading