Skip to content

Add optimized path for intermediate values aggregator #131390

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -113,16 +113,16 @@ static void selfTest() {
@Param({ BYTES_REF, INT, LONG })
public String dataType;

private static Operator operator(DriverContext driverContext, int groups, String dataType) {
private static Operator operator(DriverContext driverContext, int groups, String dataType, AggregatorMode mode) {
if (groups == 1) {
return new AggregationOperator(
List.of(supplier(dataType).aggregatorFactory(AggregatorMode.SINGLE, List.of(0)).apply(driverContext)),
List.of(supplier(dataType).aggregatorFactory(mode, List.of(0)).apply(driverContext)),
driverContext
);
}
List<BlockHash.GroupSpec> groupSpec = List.of(new BlockHash.GroupSpec(0, ElementType.LONG));
return new HashAggregationOperator(
List.of(supplier(dataType).groupingAggregatorFactory(AggregatorMode.SINGLE, List.of(1))),
List.of(supplier(dataType).groupingAggregatorFactory(mode, List.of(1))),
() -> BlockHash.build(groupSpec, driverContext.blockFactory(), 16 * 1024, false),
driverContext
) {
Expand Down Expand Up @@ -177,6 +177,9 @@ private static void checkGrouped(String prefix, int groups, String dataType, Pag

// Check them
BytesRefBlock values = page.getBlock(1);
if (values.asOrdinals() == null) {
throw new AssertionError(" expected ordinals; but got " + values);
}
for (int p = 0; p < groups; p++) {
checkExpectedBytesRef(prefix, values, p, expected.get(p));
}
Expand Down Expand Up @@ -341,13 +344,21 @@ public void run() {

private static void run(int groups, String dataType, int opCount) {
DriverContext driverContext = driverContext();
try (Operator operator = operator(driverContext, groups, dataType)) {
Page page = page(groups, dataType);
for (int i = 0; i < opCount; i++) {
operator.addInput(page.shallowCopy());
try (Operator finalAggregator = operator(driverContext, groups, dataType, AggregatorMode.FINAL)) {
try (Operator initialAggregator = operator(driverContext, groups, dataType, AggregatorMode.INITIAL)) {
Page rawPage = page(groups, dataType);
for (int i = 0; i < opCount; i++) {
initialAggregator.addInput(rawPage.shallowCopy());
}
initialAggregator.finish();
Page intermediatePage = initialAggregator.getOutput();
for (int i = 0; i < opCount; i++) {
finalAggregator.addInput(intermediatePage.shallowCopy());
}
}
operator.finish();
checkExpected(groups, dataType, operator.getOutput());
finalAggregator.finish();
Page outputPage = finalAggregator.getOutput();
checkExpected(groups, dataType, outputPage);
}
}

Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/131390.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 131390
summary: Add optimized path for intermediate values aggregator
area: ES|QL
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import static org.elasticsearch.compute.gen.Types.INTERMEDIATE_STATE_DESC;
import static org.elasticsearch.compute.gen.Types.INT_ARRAY_BLOCK;
import static org.elasticsearch.compute.gen.Types.INT_BIG_ARRAY_BLOCK;
import static org.elasticsearch.compute.gen.Types.INT_BLOCK;
import static org.elasticsearch.compute.gen.Types.INT_VECTOR;
import static org.elasticsearch.compute.gen.Types.LIST_AGG_FUNC_DESC;
import static org.elasticsearch.compute.gen.Types.LIST_INTEGER;
Expand Down Expand Up @@ -609,77 +610,98 @@ private MethodSpec addIntermediateInput(TypeName groupsType) {
.collect(joining(" && "))
);
}
if (intermediateState.stream().map(AggregatorImplementer.IntermediateStateDesc::elementType).anyMatch(n -> n.equals("BYTES_REF"))) {
builder.addStatement("$T scratch = new $T()", BYTES_REF, BYTES_REF);
}
builder.beginControlFlow("for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++)");
{
if (groupsIsBlock) {
builder.beginControlFlow("if (groups.isNull(groupPosition))");
builder.addStatement("continue");
builder.endControlFlow();
builder.addStatement("int groupStart = groups.getFirstValueIndex(groupPosition)");
builder.addStatement("int groupEnd = groupStart + groups.getValueCount(groupPosition)");
builder.beginControlFlow("for (int g = groupStart; g < groupEnd; g++)");
builder.addStatement("int groupId = groups.getInt(g)");
} else {
builder.addStatement("int groupId = groups.getInt(groupPosition)");
var bulkCombineIntermediateMethod = optionalStaticMethod(
declarationType,
requireVoidType(),
requireName("combineIntermediate"),
requireArgs(
Stream.concat(
// aggState, positionOffset, groupIds
Stream.of(aggState.declaredType(), TypeName.INT, groupsIsBlock ? INT_BLOCK : INT_VECTOR),
intermediateState.stream().map(AggregatorImplementer.IntermediateStateDesc::combineArgType)
).map(Methods::requireType).toArray(Methods.TypeMatcher[]::new)
)
);
if (bulkCombineIntermediateMethod != null) {
var states = intermediateState.stream()
.map(AggregatorImplementer.IntermediateStateDesc::name)
.collect(Collectors.joining(", "));
builder.addStatement("$T.combineIntermediate(state, positionOffset, groups, " + states + ")", declarationType);
} else {
if (intermediateState.stream()
.map(AggregatorImplementer.IntermediateStateDesc::elementType)
.anyMatch(n -> n.equals("BYTES_REF"))) {
builder.addStatement("$T scratch = new $T()", BYTES_REF, BYTES_REF);
}

if (aggState.declaredType().isPrimitive()) {
if (warnExceptions.isEmpty()) {
assert intermediateState.size() == 2;
assert intermediateState.get(1).name().equals("seen");
builder.beginControlFlow("if (seen.getBoolean(groupPosition + positionOffset))");
builder.beginControlFlow("for (int groupPosition = 0; groupPosition < groups.getPositionCount(); groupPosition++)");
{
if (groupsIsBlock) {
builder.beginControlFlow("if (groups.isNull(groupPosition))");
builder.addStatement("continue");
builder.endControlFlow();
builder.addStatement("int groupStart = groups.getFirstValueIndex(groupPosition)");
builder.addStatement("int groupEnd = groupStart + groups.getValueCount(groupPosition)");
builder.beginControlFlow("for (int g = groupStart; g < groupEnd; g++)");
builder.addStatement("int groupId = groups.getInt(g)");
} else {
assert intermediateState.size() == 3;
assert intermediateState.get(1).name().equals("seen");
assert intermediateState.get(2).name().equals("failed");
builder.beginControlFlow("if (failed.getBoolean(groupPosition + positionOffset))");
{
builder.addStatement("state.setFailed(groupId)");
}
builder.nextControlFlow("else if (seen.getBoolean(groupPosition + positionOffset))");
builder.addStatement("int groupId = groups.getInt(groupPosition)");
}

warningsBlock(builder, () -> {
var name = intermediateState.get(0).name();
var vectorAccessor = vectorAccessorName(intermediateState.get(0).elementType());
builder.addStatement(
"state.set(groupId, $T.combine(state.getOrDefault(groupId), $L.$L(groupPosition + positionOffset)))",
if (aggState.declaredType().isPrimitive()) {
if (warnExceptions.isEmpty()) {
assert intermediateState.size() == 2;
assert intermediateState.get(1).name().equals("seen");
builder.beginControlFlow("if (seen.getBoolean(groupPosition + positionOffset))");
} else {
assert intermediateState.size() == 3;
assert intermediateState.get(1).name().equals("seen");
assert intermediateState.get(2).name().equals("failed");
builder.beginControlFlow("if (failed.getBoolean(groupPosition + positionOffset))");
{
builder.addStatement("state.setFailed(groupId)");
}
builder.nextControlFlow("else if (seen.getBoolean(groupPosition + positionOffset))");
}

warningsBlock(builder, () -> {
var name = intermediateState.get(0).name();
var vectorAccessor = vectorAccessorName(intermediateState.get(0).elementType());
builder.addStatement(
"state.set(groupId, $T.combine(state.getOrDefault(groupId), $L.$L(groupPosition + positionOffset)))",
declarationType,
name,
vectorAccessor
);
});
builder.endControlFlow();
} else {
var stateHasBlock = intermediateState.stream().anyMatch(AggregatorImplementer.IntermediateStateDesc::block);
requireStaticMethod(
declarationType,
name,
vectorAccessor
requireVoidType(),
requireName("combineIntermediate"),
requireArgs(
Stream.of(
Stream.of(aggState.declaredType(), TypeName.INT), // aggState and groupId
intermediateState.stream().map(AggregatorImplementer.IntermediateStateDesc::combineArgType),
Stream.of(TypeName.INT).filter(p -> stateHasBlock) // position
).flatMap(Function.identity()).map(Methods::requireType).toArray(Methods.TypeMatcher[]::new)
)
);
});
builder.endControlFlow();
} else {
var stateHasBlock = intermediateState.stream().anyMatch(AggregatorImplementer.IntermediateStateDesc::block);
requireStaticMethod(
declarationType,
requireVoidType(),
requireName("combineIntermediate"),
requireArgs(
Stream.of(
Stream.of(aggState.declaredType(), TypeName.INT), // aggState and groupId
intermediateState.stream().map(AggregatorImplementer.IntermediateStateDesc::combineArgType),
Stream.of(TypeName.INT).filter(p -> stateHasBlock) // position
).flatMap(Function.identity()).map(Methods::requireType).toArray(Methods.TypeMatcher[]::new)
)
);

builder.addStatement(
"$T.combineIntermediate(state, groupId, "
+ intermediateState.stream().map(desc -> desc.access("groupPosition + positionOffset")).collect(joining(", "))
+ (stateHasBlock ? ", groupPosition + positionOffset" : "")
+ ")",
declarationType
);
}
if (groupsIsBlock) {
builder.addStatement(
"$T.combineIntermediate(state, groupId, "
+ intermediateState.stream().map(desc -> desc.access("groupPosition + positionOffset")).collect(joining(", "))
+ (stateHasBlock ? ", groupPosition + positionOffset" : "")
+ ")",
declarationType
);
}
if (groupsIsBlock) {
builder.endControlFlow();
}
builder.endControlFlow();
}
builder.endControlFlow();
}
return builder.build();
}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading