Skip to content

Support Nested Aggregations as part of Star-Tree #18048

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -990,7 +990,6 @@ private void constructNonStarNodes(InMemoryTreeNode node, int startDocId, int en
Long dimensionValue = getDimensionValue(i, dimensionId);
if (Objects.equals(dimensionValue, nodeDimensionValue) == false) {
addChildNode(node, i, dimensionId, nodeStartDocId, nodeDimensionValue);

nodeStartDocId = i;
nodeDimensionValue = dimensionValue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;

import java.io.IOException;
import java.util.List;

/**
* This interface is used to pre-compute the star tree bucket collector for each segment/leaf.
Expand All @@ -29,4 +30,18 @@ StarTreeBucketCollector getStarTreeBucketCollector(
CompositeIndexFieldInfo starTree,
StarTreeBucketCollector parentCollector
) throws IOException;

/**
* Returns the list of dimensions involved in this aggregation, which are required for
* merging dimension filters during StarTree precomputation. This is specifically needed
* for bucket aggregations to ensure that the correct dimensions are considered when
* constructing or merging filters during StarTree traversal.
* For metric aggregations, there is no need to specify dimensions since they operate
* purely on values within the buckets formed by parent bucket aggregations.
*
* @return List of dimension field names involved in the aggregation.
*/
default List<String> getDimensionFilters() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

StarTreePreComputeCollector is only involved for aggregations not including Metric aggregators. You can very well define the method here, instead of having the same implementation everywhere. (overlooking the overrides - the implementations look same.

return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
import org.opensearch.search.startree.filter.DimensionFilter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
Expand Down Expand Up @@ -283,29 +284,46 @@ private String fetchStarTreeCalendarUnit() {
return dimensionName;
}

@Override
public List<String> getDimensionFilters() {
List<String> dimensionsToMerge = new ArrayList<>();
dimensionsToMerge.add(starTreeDateDimension);

for (Aggregator subAgg : subAggregators) {
if (subAgg instanceof StarTreePreComputeCollector collector) {
List<String> childFilters = collector.getDimensionFilters();
dimensionsToMerge.addAll(childFilters != null ? childFilters : Collections.emptyList());
}
}

return dimensionsToMerge;
}

@Override
public StarTreeBucketCollector getStarTreeBucketCollector(
LeafReaderContext ctx,
CompositeIndexFieldInfo starTree,
StarTreeBucketCollector parentCollector
) throws IOException {
assert parentCollector == null;
StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree);
SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues
.getDimensionValuesIterator(starTreeDateDimension);
SortedNumericStarTreeValuesIterator docCountsIterator = StarTreeQueryHelper.getDocCountsIterator(starTreeValues, starTree);
List<String> dimensionsToMerge = getDimensionFilters();

return new StarTreeBucketCollector(
starTreeValues,
StarTreeTraversalUtil.getStarTreeResult(
starTreeValues,
StarTreeQueryHelper.mergeDimensionFilterIfNotExists(
context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(),
starTreeDateDimension,
List.of(DimensionFilter.MATCH_ALL_DEFAULT)
),
context
)
parent == null
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets add some comments please on why its done this way

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This block of code can be extracted out and get reused ?

? StarTreeTraversalUtil.getStarTreeResult(
starTreeValues,
StarTreeQueryHelper.mergeDimensionFilterIfNotExists(
context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(),
dimensionsToMerge,
List.of(DimensionFilter.MATCH_ALL_DEFAULT)
),
context
)
: null
) {
@Override
public void setSubCollectors() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -380,26 +381,44 @@ private void preComputeWithStarTree(LeafReaderContext ctx, CompositeIndexFieldIn
}
}

@Override
public List<String> getDimensionFilters() {
List<String> dimensionsToMerge = new ArrayList<>();
dimensionsToMerge.add(fieldName);

for (Aggregator subAgg : subAggregators) {
if (subAgg instanceof StarTreePreComputeCollector collector) {
List<String> childFilters = collector.getDimensionFilters();
dimensionsToMerge.addAll(childFilters != null ? childFilters : Collections.emptyList());
}
}

return dimensionsToMerge;
}

@Override
public StarTreeBucketCollector getStarTreeBucketCollector(
LeafReaderContext ctx,
CompositeIndexFieldInfo starTree,
StarTreeBucketCollector parentCollector
) throws IOException {
assert parentCollector == null;
StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree);
List<String> dimensionsToMerge = getDimensionFilters();

// TODO: Evaluate optimizing StarTree traversal filter with specific ranges instead of MATCH_ALL_DEFAULT
return new StarTreeBucketCollector(
starTreeValues,
StarTreeTraversalUtil.getStarTreeResult(
starTreeValues,
StarTreeQueryHelper.mergeDimensionFilterIfNotExists(
context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(),
fieldName,
List.of(DimensionFilter.MATCH_ALL_DEFAULT)
),
context
)
parent == null
? StarTreeTraversalUtil.getStarTreeResult(
starTreeValues,
StarTreeQueryHelper.mergeDimensionFilterIfNotExists(
context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(),
dimensionsToMerge,
List.of(DimensionFilter.MATCH_ALL_DEFAULT)
),
context
)
: null
) {
@Override
public void setSubCollectors() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@
import org.opensearch.search.startree.filter.DimensionFilter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.BiConsumer;
Expand All @@ -103,11 +105,11 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
private final long valueCount;
protected final String fieldName;
private Weight weight;
protected final CollectionStrategy collectionStrategy;
protected CollectionStrategy collectionStrategy;
private final SetOnce<SortedSetDocValues> dvs = new SetOnce<>();
protected int segmentsWithSingleValuedOrds = 0;
protected int segmentsWithMultiValuedOrds = 0;
LongUnaryOperator globalOperator;
protected CardinalityUpperBound cardinalityUpperBound;

/**
* Lookup global ordinals
Expand Down Expand Up @@ -136,6 +138,7 @@ public GlobalOrdinalsStringTermsAggregator(
Map<String, Object> metadata
) throws IOException {
super(name, factories, context, parent, order, format, bucketCountThresholds, collectionMode, showTermDocCountError, metadata);
this.cardinalityUpperBound = cardinality;
this.resultStrategy = resultStrategy.apply(this); // ResultStrategy needs a reference to the Aggregator to do its job.
this.valuesSource = valuesSource;
final IndexReader reader = context.searcher().getIndexReader();
Expand Down Expand Up @@ -248,7 +251,6 @@ protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws
protected boolean tryStarTreePrecompute(LeafReaderContext ctx) throws IOException {
CompositeIndexFieldInfo supportedStarTree = StarTreeQueryHelper.getSupportedStarTree(this.context.getQueryShardContext());
if (supportedStarTree != null) {
globalOperator = valuesSource.globalOrdinalsMapping(ctx);
StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, supportedStarTree, null);
StarTreeQueryHelper.preComputeBucketsWithStarTree(starTreeBucketCollector);
return true;
Expand All @@ -260,7 +262,6 @@ protected boolean tryStarTreePrecompute(LeafReaderContext ctx) throws IOExceptio
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx);
collectionStrategy.globalOrdsReady(globalOrds);

SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds);
if (singleValues != null) {
segmentsWithSingleValuedOrds++;
Expand Down Expand Up @@ -332,29 +333,57 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
});
}

@Override
public List<String> getDimensionFilters() {
List<String> dimensionsToMerge = new ArrayList<>();
dimensionsToMerge.add(fieldName);

for (Aggregator subAgg : subAggregators) {
if (subAgg instanceof StarTreePreComputeCollector collector) {
List<String> childFilters = collector.getDimensionFilters();
dimensionsToMerge.addAll(childFilters != null ? childFilters : Collections.emptyList());
}
}

return dimensionsToMerge;
}

public StarTreeBucketCollector getStarTreeBucketCollector(
LeafReaderContext ctx,
CompositeIndexFieldInfo starTree,
StarTreeBucketCollector parent
) throws IOException {
assert parent == null;
StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree);
SortedSetStarTreeValuesIterator valuesIterator = (SortedSetStarTreeValuesIterator) starTreeValues.getDimensionValuesIterator(
fieldName
);
SortedNumericStarTreeValuesIterator docCountsIterator = StarTreeQueryHelper.getDocCountsIterator(starTreeValues, starTree);
List<String> dimensionsToMerge = getDimensionFilters();

/* For nested aggregations, we require the RemapGlobalOrdsStarTree strategy to properly
handle global ordinal remapping. This check ensures we don't reinitialize the
collectionStrategy again if it's already correctly set. */
if (parent != null && !(collectionStrategy instanceof RemapGlobalOrdsStarTree)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had fixed redundant initialization of globalOrds as part of this change #18059 (pending merge)

Make sure that the changes don't break.

collectionStrategy.close();
collectionStrategy = new RemapGlobalOrdsStarTree(this.cardinalityUpperBound);
SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx);
collectionStrategy.globalOrdsReady(globalOrds);
}

LongUnaryOperator globalOperator = valuesSource.globalOrdinalsMapping(ctx);
return new StarTreeBucketCollector(
starTreeValues,
StarTreeTraversalUtil.getStarTreeResult(
starTreeValues,
StarTreeQueryHelper.mergeDimensionFilterIfNotExists(
context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(),
fieldName,
List.of(DimensionFilter.MATCH_ALL_DEFAULT)
),
context
)
parent == null
? StarTreeTraversalUtil.getStarTreeResult(
starTreeValues,
StarTreeQueryHelper.mergeDimensionFilterIfNotExists(
context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(),
dimensionsToMerge,
List.of(DimensionFilter.MATCH_ALL_DEFAULT)
),
context
)
: null
) {
@Override
public void setSubCollectors() throws IOException {
Expand All @@ -371,11 +400,14 @@ public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws
for (int i = 0, count = valuesIterator.docValueCount(); i < count; i++) {
long dimensionValue = valuesIterator.value();
long ord = globalOperator.applyAsLong(dimensionValue);

if (docCountsIterator.advanceExact(starTreeEntry)) {
long metricValue = docCountsIterator.nextValue();
long bucketOrd = collectionStrategy.globalOrdToBucketOrd(0, ord);
collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry);
if (collectionStrategy instanceof RemapGlobalOrdsStarTree rangeSTGlobalOrds) {
rangeSTGlobalOrds.collectGlobalOrdsForStarTree(owningBucketOrd, starTreeEntry, ord, this, metricValue);
} else {
long bucketOrd = collectionStrategy.globalOrdToBucketOrd(owningBucketOrd, ord);
collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry);
}
}
}
}
Expand Down Expand Up @@ -708,7 +740,7 @@ public void close() {}
* less when collecting only a few.
*/
private class RemapGlobalOrds extends CollectionStrategy {
private final LongKeyedBucketOrds bucketOrds;
protected final LongKeyedBucketOrds bucketOrds;

private RemapGlobalOrds(CardinalityUpperBound cardinality) {
bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), cardinality);
Expand Down Expand Up @@ -784,6 +816,28 @@ public void close() {
}
}

private class RemapGlobalOrdsStarTree extends RemapGlobalOrds {
private RemapGlobalOrdsStarTree(CardinalityUpperBound cardinality) {
super(cardinality);
}

@Override
String describe() {
return "remapStarTree";
}

void collectGlobalOrdsForStarTree(
long owningBucketOrd,
int starTreeEntry,
long globalOrd,
StarTreeBucketCollector collector,
long docCount
) throws IOException {
long bucketOrd = bucketOrds.add(owningBucketOrd, globalOrd);
collectStarTreeBucket(collector, docCount, bucketOrd, starTreeEntry);
}
}

/**
* Strategy for building results.
*/
Expand Down
Loading
Loading