From b3bee6c481101f70cc3486f1ff70bde38268d2b3 Mon Sep 17 00:00:00 2001 From: Shailesh Singh Date: Wed, 23 Apr 2025 15:57:39 +0530 Subject: [PATCH 1/2] Support Nested Aggregations as part of Star-Tree --- .../startree/builder/BaseStarTreeBuilder.java | 1 - .../StarTreePreComputeCollector.java | 15 + .../histogram/DateHistogramAggregator.java | 38 +- .../bucket/range/RangeAggregator.java | 39 +- .../GlobalOrdinalsStringTermsAggregator.java | 90 +++- .../bucket/terms/NumericTermsAggregator.java | 43 +- .../search/startree/StarTreeQueryContext.java | 67 +-- .../search/startree/StarTreeQueryHelper.java | 8 +- .../startree/StarTreeTraversalUtil.java | 12 + .../startree/filter/DimensionFilter.java | 27 +- .../startree/filter/MatchAllFilter.java | 44 ++ .../search/SearchServiceStarTreeTests.java | 181 ++++++++ .../startree/StarTreeFilterTests.java | 8 +- .../StarTreeNestedAggregatorTests.java | 407 ++++++++++++++++++ .../aggregations/AggregatorTestCase.java | 22 +- 15 files changed, 883 insertions(+), 119 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/startree/filter/MatchAllFilter.java create mode 100644 server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeNestedAggregatorTests.java diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java index 935c490b5a4dc..e6e8332b0723f 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java @@ -990,7 +990,6 @@ private void constructNonStarNodes(InMemoryTreeNode node, int startDocId, int en Long dimensionValue = getDimensionValue(i, dimensionId); if (Objects.equals(dimensionValue, nodeDimensionValue) == false) { addChildNode(node, i, dimensionId, nodeStartDocId, nodeDimensionValue); - nodeStartDocId = i; nodeDimensionValue = dimensionValue; } diff --git a/server/src/main/java/org/opensearch/search/aggregations/StarTreePreComputeCollector.java b/server/src/main/java/org/opensearch/search/aggregations/StarTreePreComputeCollector.java index c2f2017997c4d..b56cd6f5cafe2 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/StarTreePreComputeCollector.java +++ b/server/src/main/java/org/opensearch/search/aggregations/StarTreePreComputeCollector.java @@ -12,6 +12,7 @@ import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import java.io.IOException; +import java.util.List; /** * This interface is used to pre-compute the star tree bucket collector for each segment/leaf. @@ -29,4 +30,18 @@ StarTreeBucketCollector getStarTreeBucketCollector( CompositeIndexFieldInfo starTree, StarTreeBucketCollector parentCollector ) throws IOException; + + /** + * Returns the list of dimensions involved in this aggregation, which are required for + * merging dimension filters during StarTree precomputation. This is specifically needed + * for bucket aggregations to ensure that the correct dimensions are considered when + * constructing or merging filters during StarTree traversal. + * For metric aggregations, there is no need to specify dimensions since they operate + * purely on values within the buckets formed by parent bucket aggregations. + * + * @return List of dimension field names involved in the aggregation. + */ + default List getDimensionFilters() { + return null; + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index c3cd4b40cf0d6..00ea6b3cfd8fb 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -69,6 +69,7 @@ import org.opensearch.search.startree.filter.DimensionFilter; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Locale; @@ -283,29 +284,46 @@ private String fetchStarTreeCalendarUnit() { return dimensionName; } + @Override + public List getDimensionFilters() { + List dimensionsToMerge = new ArrayList<>(); + dimensionsToMerge.add(starTreeDateDimension); + + for (Aggregator subAgg : subAggregators) { + if (subAgg instanceof StarTreePreComputeCollector collector) { + List childFilters = collector.getDimensionFilters(); + dimensionsToMerge.addAll(childFilters != null ? childFilters : Collections.emptyList()); + } + } + + return dimensionsToMerge; + } + @Override public StarTreeBucketCollector getStarTreeBucketCollector( LeafReaderContext ctx, CompositeIndexFieldInfo starTree, StarTreeBucketCollector parentCollector ) throws IOException { - assert parentCollector == null; StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues .getDimensionValuesIterator(starTreeDateDimension); SortedNumericStarTreeValuesIterator docCountsIterator = StarTreeQueryHelper.getDocCountsIterator(starTreeValues, starTree); + List dimensionsToMerge = getDimensionFilters(); return new StarTreeBucketCollector( starTreeValues, - StarTreeTraversalUtil.getStarTreeResult( - starTreeValues, - StarTreeQueryHelper.mergeDimensionFilterIfNotExists( - context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), - starTreeDateDimension, - List.of(DimensionFilter.MATCH_ALL_DEFAULT) - ), - context - ) + parent == null + ? StarTreeTraversalUtil.getStarTreeResult( + starTreeValues, + StarTreeQueryHelper.mergeDimensionFilterIfNotExists( + context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), + dimensionsToMerge, + List.of(DimensionFilter.MATCH_ALL_DEFAULT) + ), + context + ) + : null ) { @Override public void setSubCollectors() throws IOException { diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index 02186a6a99079..0b79c529a2dd2 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -75,6 +75,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Objects; @@ -380,26 +381,44 @@ private void preComputeWithStarTree(LeafReaderContext ctx, CompositeIndexFieldIn } } + @Override + public List getDimensionFilters() { + List dimensionsToMerge = new ArrayList<>(); + dimensionsToMerge.add(fieldName); + + for (Aggregator subAgg : subAggregators) { + if (subAgg instanceof StarTreePreComputeCollector collector) { + List childFilters = collector.getDimensionFilters(); + dimensionsToMerge.addAll(childFilters != null ? childFilters : Collections.emptyList()); + } + } + + return dimensionsToMerge; + } + @Override public StarTreeBucketCollector getStarTreeBucketCollector( LeafReaderContext ctx, CompositeIndexFieldInfo starTree, StarTreeBucketCollector parentCollector ) throws IOException { - assert parentCollector == null; StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); + List dimensionsToMerge = getDimensionFilters(); + // TODO: Evaluate optimizing StarTree traversal filter with specific ranges instead of MATCH_ALL_DEFAULT return new StarTreeBucketCollector( starTreeValues, - StarTreeTraversalUtil.getStarTreeResult( - starTreeValues, - StarTreeQueryHelper.mergeDimensionFilterIfNotExists( - context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), - fieldName, - List.of(DimensionFilter.MATCH_ALL_DEFAULT) - ), - context - ) + parent == null + ? StarTreeTraversalUtil.getStarTreeResult( + starTreeValues, + StarTreeQueryHelper.mergeDimensionFilterIfNotExists( + context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), + dimensionsToMerge, + List.of(DimensionFilter.MATCH_ALL_DEFAULT) + ), + context + ) + : null ) { @Override public void setSubCollectors() throws IOException { diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index d8ec9feaf44b4..1820740e43a74 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -79,7 +79,9 @@ import org.opensearch.search.startree.filter.DimensionFilter; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.function.BiConsumer; @@ -103,11 +105,11 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr private final long valueCount; protected final String fieldName; private Weight weight; - protected final CollectionStrategy collectionStrategy; + protected CollectionStrategy collectionStrategy; private final SetOnce dvs = new SetOnce<>(); protected int segmentsWithSingleValuedOrds = 0; protected int segmentsWithMultiValuedOrds = 0; - LongUnaryOperator globalOperator; + protected CardinalityUpperBound cardinalityUpperBound; /** * Lookup global ordinals @@ -136,6 +138,7 @@ public GlobalOrdinalsStringTermsAggregator( Map metadata ) throws IOException { super(name, factories, context, parent, order, format, bucketCountThresholds, collectionMode, showTermDocCountError, metadata); + this.cardinalityUpperBound = cardinality; this.resultStrategy = resultStrategy.apply(this); // ResultStrategy needs a reference to the Aggregator to do its job. this.valuesSource = valuesSource; final IndexReader reader = context.searcher().getIndexReader(); @@ -248,7 +251,6 @@ protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws protected boolean tryStarTreePrecompute(LeafReaderContext ctx) throws IOException { CompositeIndexFieldInfo supportedStarTree = StarTreeQueryHelper.getSupportedStarTree(this.context.getQueryShardContext()); if (supportedStarTree != null) { - globalOperator = valuesSource.globalOrdinalsMapping(ctx); StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, supportedStarTree, null); StarTreeQueryHelper.preComputeBucketsWithStarTree(starTreeBucketCollector); return true; @@ -260,7 +262,6 @@ protected boolean tryStarTreePrecompute(LeafReaderContext ctx) throws IOExceptio public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx); collectionStrategy.globalOrdsReady(globalOrds); - SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds); if (singleValues != null) { segmentsWithSingleValuedOrds++; @@ -332,29 +333,57 @@ public void collect(int doc, long owningBucketOrd) throws IOException { }); } + @Override + public List getDimensionFilters() { + List dimensionsToMerge = new ArrayList<>(); + dimensionsToMerge.add(fieldName); + + for (Aggregator subAgg : subAggregators) { + if (subAgg instanceof StarTreePreComputeCollector collector) { + List childFilters = collector.getDimensionFilters(); + dimensionsToMerge.addAll(childFilters != null ? childFilters : Collections.emptyList()); + } + } + + return dimensionsToMerge; + } + public StarTreeBucketCollector getStarTreeBucketCollector( LeafReaderContext ctx, CompositeIndexFieldInfo starTree, StarTreeBucketCollector parent ) throws IOException { - assert parent == null; StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); SortedSetStarTreeValuesIterator valuesIterator = (SortedSetStarTreeValuesIterator) starTreeValues.getDimensionValuesIterator( fieldName ); SortedNumericStarTreeValuesIterator docCountsIterator = StarTreeQueryHelper.getDocCountsIterator(starTreeValues, starTree); + List dimensionsToMerge = getDimensionFilters(); + + /* For nested aggregations, we require the RemapGlobalOrdsStarTree strategy to properly + handle global ordinal remapping. This check ensures we don't reinitialize the + collectionStrategy again if it's already correctly set. */ + if (parent != null && !(collectionStrategy instanceof RemapGlobalOrdsStarTree)) { + collectionStrategy.close(); + collectionStrategy = new RemapGlobalOrdsStarTree(this.cardinalityUpperBound); + SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx); + collectionStrategy.globalOrdsReady(globalOrds); + } + LongUnaryOperator globalOperator = valuesSource.globalOrdinalsMapping(ctx); return new StarTreeBucketCollector( starTreeValues, - StarTreeTraversalUtil.getStarTreeResult( - starTreeValues, - StarTreeQueryHelper.mergeDimensionFilterIfNotExists( - context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), - fieldName, - List.of(DimensionFilter.MATCH_ALL_DEFAULT) - ), - context - ) + parent == null + ? StarTreeTraversalUtil.getStarTreeResult( + starTreeValues, + StarTreeQueryHelper.mergeDimensionFilterIfNotExists( + context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), + dimensionsToMerge, + List.of(DimensionFilter.MATCH_ALL_DEFAULT) + ), + context + ) + : null ) { @Override public void setSubCollectors() throws IOException { @@ -371,11 +400,14 @@ public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws for (int i = 0, count = valuesIterator.docValueCount(); i < count; i++) { long dimensionValue = valuesIterator.value(); long ord = globalOperator.applyAsLong(dimensionValue); - if (docCountsIterator.advanceExact(starTreeEntry)) { long metricValue = docCountsIterator.nextValue(); - long bucketOrd = collectionStrategy.globalOrdToBucketOrd(0, ord); - collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); + if (collectionStrategy instanceof RemapGlobalOrdsStarTree rangeSTGlobalOrds) { + rangeSTGlobalOrds.collectGlobalOrdsForStarTree(owningBucketOrd, starTreeEntry, ord, this, metricValue); + } else { + long bucketOrd = collectionStrategy.globalOrdToBucketOrd(owningBucketOrd, ord); + collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); + } } } } @@ -708,7 +740,7 @@ public void close() {} * less when collecting only a few. */ private class RemapGlobalOrds extends CollectionStrategy { - private final LongKeyedBucketOrds bucketOrds; + protected final LongKeyedBucketOrds bucketOrds; private RemapGlobalOrds(CardinalityUpperBound cardinality) { bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), cardinality); @@ -784,6 +816,28 @@ public void close() { } } + private class RemapGlobalOrdsStarTree extends RemapGlobalOrds { + private RemapGlobalOrdsStarTree(CardinalityUpperBound cardinality) { + super(cardinality); + } + + @Override + String describe() { + return "remapStarTree"; + } + + void collectGlobalOrdsForStarTree( + long owningBucketOrd, + int starTreeEntry, + long globalOrd, + StarTreeBucketCollector collector, + long docCount + ) throws IOException { + long bucketOrd = bucketOrds.add(owningBucketOrd, globalOrd); + collectStarTreeBucket(collector, docCount, bucketOrd, starTreeEntry); + } + } + /** * Strategy for building results. */ diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java index bcdea9fb4af3c..25dfd08484a0a 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java @@ -72,7 +72,9 @@ import java.io.IOException; import java.math.BigInteger; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -136,7 +138,6 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol public void collect(int doc, long owningBucketOrd) throws IOException { if (values.advanceExact(doc)) { int valuesCount = values.docValueCount(); - long previous = Long.MAX_VALUE; for (int i = 0; i < valuesCount; ++i) { long val = values.nextValue(); @@ -169,28 +170,44 @@ protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws return false; } + @Override + public List getDimensionFilters() { + List dimensionsToMerge = new ArrayList<>(); + dimensionsToMerge.add(fieldName); + + for (Aggregator subAgg : subAggregators) { + if (subAgg instanceof StarTreePreComputeCollector collector) { + List childFilters = collector.getDimensionFilters(); + dimensionsToMerge.addAll(childFilters != null ? childFilters : Collections.emptyList()); + } + } + + return dimensionsToMerge; + } + public StarTreeBucketCollector getStarTreeBucketCollector( LeafReaderContext ctx, CompositeIndexFieldInfo starTree, StarTreeBucketCollector parent ) throws IOException { - assert parent == null; StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues .getDimensionValuesIterator(fieldName); SortedNumericStarTreeValuesIterator docCountsIterator = StarTreeQueryHelper.getDocCountsIterator(starTreeValues, starTree); - + List dimensionsToMerge = getDimensionFilters(); return new StarTreeBucketCollector( starTreeValues, - StarTreeTraversalUtil.getStarTreeResult( - starTreeValues, - StarTreeQueryHelper.mergeDimensionFilterIfNotExists( - context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), - fieldName, - List.of(DimensionFilter.MATCH_ALL_DEFAULT) - ), - context - ) + parent == null + ? StarTreeTraversalUtil.getStarTreeResult( + starTreeValues, + StarTreeQueryHelper.mergeDimensionFilterIfNotExists( + context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(), + dimensionsToMerge, + List.of(DimensionFilter.MATCH_ALL_DEFAULT) + ), + context + ) + : null ) { @Override public void setSubCollectors() throws IOException { @@ -215,7 +232,6 @@ public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws } for (int i = 0, count = valuesIterator.entryValueCount(); i < count; i++) { - if (docCountsIterator.advanceExact(starTreeEntry)) { long metricValue = docCountsIterator.nextValue(); long bucketOrd = bucketOrds.add(owningBucketOrd, dimensionValue); @@ -300,7 +316,6 @@ private InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws } buildSubAggs(topBucketsPerOrd); - InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { result[ordIdx] = buildResult(owningBucketOrds[ordIdx], otherDocCounts[ordIdx], topBucketsPerOrd[ordIdx]); diff --git a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java index 53a5a7e007417..9e6315f3d5a13 100644 --- a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java +++ b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java @@ -107,23 +107,7 @@ public void maybeSetCachedNodeIdsForSegment(int key, FixedBitSet values) { public boolean consolidateAllFilters(SearchContext context) { // Validate the fields and metrics required by aggregations are supported in star tree for (AggregatorFactory aggregatorFactory : context.aggregations().factories().getFactories()) { - // first check for aggregation is a metric aggregation - if (validateStarTreeMetricSupport(compositeMappedFieldType, aggregatorFactory)) { - continue; - } - - // if not a metric aggregation, check for applicable date histogram shape - if (validateDateHistogramSupport(compositeMappedFieldType, aggregatorFactory)) { - continue; - } - - // validation for terms aggregation - if (validateKeywordTermsAggregationSupport(compositeMappedFieldType, aggregatorFactory)) { - continue; - } - - // validation for range aggregation - if (validateRangeAggregationSupport(compositeMappedFieldType, aggregatorFactory)) { + if (validateNestedAggregationStructure(compositeMappedFieldType, aggregatorFactory)) { continue; } // invalid query shape @@ -181,12 +165,6 @@ private static boolean validateKeywordTermsAggregationSupport( return false; } - // Validate all sub-factories - for (AggregatorFactory subFactory : aggregatorFactory.getSubFactories().getFactories()) { - if (!validateStarTreeMetricSupport(compositeIndexFieldInfo, subFactory)) { - return false; - } - } return true; } @@ -208,12 +186,6 @@ private static boolean validateRangeAggregationSupport( return false; } - // Validate all sub-factories - for (AggregatorFactory subFactory : aggregatorFactory.getSubFactories().getFactories()) { - if (!validateStarTreeMetricSupport(compositeIndexFieldInfo, subFactory)) { - return false; - } - } return true; } @@ -270,12 +242,45 @@ private static boolean validateDateHistogramSupport( return false; } - // Validate all sub-factories + return true; + } + + private static boolean validateNestedAggregationStructure( + CompositeDataCubeFieldType compositeIndexFieldInfo, + AggregatorFactory aggregatorFactory + ) { + boolean isValid; + + switch (aggregatorFactory) { + case TermsAggregatorFactory termsAggregatorFactory -> isValid = validateKeywordTermsAggregationSupport( + compositeIndexFieldInfo, + termsAggregatorFactory + ); + case DateHistogramAggregatorFactory dateHistogramAggregatorFactory -> isValid = validateDateHistogramSupport( + compositeIndexFieldInfo, + dateHistogramAggregatorFactory + ); + case RangeAggregatorFactory rangeAggregatorFactory -> isValid = validateRangeAggregationSupport( + compositeIndexFieldInfo, + rangeAggregatorFactory + ); + case MetricAggregatorFactory metricAggregatorFactory -> { + isValid = validateStarTreeMetricSupport(compositeIndexFieldInfo, metricAggregatorFactory); + return isValid && metricAggregatorFactory.getSubFactories().getFactories().length == 0; + } + case null, default -> { + return false; + } + } + + if (!isValid) return false; + for (AggregatorFactory subFactory : aggregatorFactory.getSubFactories().getFactories()) { - if (!validateStarTreeMetricSupport(compositeIndexFieldInfo, subFactory)) { + if (!validateNestedAggregationStructure(compositeIndexFieldInfo, subFactory)) { return false; } } + return true; } diff --git a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryHelper.java b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryHelper.java index 68a613a373edf..82619634dc881 100644 --- a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryHelper.java +++ b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryHelper.java @@ -212,16 +212,16 @@ public static void preComputeBucketsWithStarTree(StarTreeBucketCollector starTre public static StarTreeFilter mergeDimensionFilterIfNotExists( StarTreeFilter baseStarTreeFilter, - String dimensionToMerge, + List dimensionsToMerge, List dimensionFiltersToMerge ) { Map> dimensionFilterMap = new HashMap<>(baseStarTreeFilter.getDimensions().size()); for (String baseDimension : baseStarTreeFilter.getDimensions()) { dimensionFilterMap.put(baseDimension, baseStarTreeFilter.getFiltersForDimension(baseDimension)); } - // Don't add groupBy when already present in base filter. - if (!dimensionFilterMap.containsKey(dimensionToMerge)) { - dimensionFilterMap.put(dimensionToMerge, dimensionFiltersToMerge); + + for (String dimension : dimensionsToMerge) { + dimensionFilterMap.putIfAbsent(dimension, dimensionFiltersToMerge); } return new StarTreeFilter(dimensionFilterMap); } diff --git a/server/src/main/java/org/opensearch/search/startree/StarTreeTraversalUtil.java b/server/src/main/java/org/opensearch/search/startree/StarTreeTraversalUtil.java index cf9c125e84b79..804b3d332b586 100644 --- a/server/src/main/java/org/opensearch/search/startree/StarTreeTraversalUtil.java +++ b/server/src/main/java/org/opensearch/search/startree/StarTreeTraversalUtil.java @@ -21,6 +21,7 @@ import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.StarTreeValuesIterator; import org.opensearch.search.internal.SearchContext; import org.opensearch.search.startree.filter.DimensionFilter; +import org.opensearch.search.startree.filter.MatchAllFilter; import org.opensearch.search.startree.filter.StarTreeFilter; import java.io.IOException; @@ -91,6 +92,17 @@ public static FixedBitSet getStarTreeResult(StarTreeValues starTreeValues, StarT // Clear the temporary bit set before reuse tempBitSet.clear(0, starTreeResult.maxMatchedDoc + 1); + // Skip filtering if a MatchAllFilter is present for this dimension, since it implies all values match and no further filtering + // is needed + boolean isMatchAllFilterPresent = false; + for (DimensionFilter dimensionFilter : dimensionFilters) { + if (dimensionFilter instanceof MatchAllFilter) { + isMatchAllFilterPresent = true; + break; + } + } + if (isMatchAllFilterPresent) continue; + if (bitSet.length() > 0) { // Iterate over the current set of matched document IDs for (int entryId = bitSet.nextSetBit(0); entryId != DocIdSetIterator.NO_MORE_DOCS; entryId = (entryId + 1 < bitSet.length()) diff --git a/server/src/main/java/org/opensearch/search/startree/filter/DimensionFilter.java b/server/src/main/java/org/opensearch/search/startree/filter/DimensionFilter.java index 64f971a58f216..acf89a3b0285a 100644 --- a/server/src/main/java/org/opensearch/search/startree/filter/DimensionFilter.java +++ b/server/src/main/java/org/opensearch/search/startree/filter/DimensionFilter.java @@ -11,12 +11,10 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; -import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; import org.opensearch.search.internal.SearchContext; import org.opensearch.search.startree.StarTreeNodeCollector; import java.io.IOException; -import java.util.Iterator; /** * Contains the logic to filter over a dimension either in StarTree Index or it's Dimension DocValues @@ -24,30 +22,7 @@ @ExperimentalApi public interface DimensionFilter { - DimensionFilter MATCH_ALL_DEFAULT = new DimensionFilter() { - @Override - public void initialiseForSegment(StarTreeValues starTreeValues, SearchContext searchContext) throws IOException { - - } - - @Override - public void matchStarTreeNodes(StarTreeNode parentNode, StarTreeValues starTreeValues, StarTreeNodeCollector collector) - throws IOException { - if (parentNode != null) { - for (Iterator it = parentNode.getChildrenIterator(); it.hasNext();) { - StarTreeNode starTreeNode = it.next(); - if (starTreeNode.getStarTreeNodeType() == StarTreeNodeType.DEFAULT.getValue()) { - collector.collectStarTreeNode(starTreeNode); - } - } - } - } - - @Override - public boolean matchDimValue(long ordinal, StarTreeValues starTreeValues) { - return true; - } - }; + DimensionFilter MATCH_ALL_DEFAULT = new MatchAllFilter(); /** * Converts parsed user values to ordinals based on segment and other init actions can be performed. diff --git a/server/src/main/java/org/opensearch/search/startree/filter/MatchAllFilter.java b/server/src/main/java/org/opensearch/search/startree/filter/MatchAllFilter.java new file mode 100644 index 0000000000000..72391041e10dc --- /dev/null +++ b/server/src/main/java/org/opensearch/search/startree/filter/MatchAllFilter.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.startree.filter; + +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNode; +import org.opensearch.index.compositeindex.datacube.startree.node.StarTreeNodeType; +import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.startree.StarTreeNodeCollector; + +import java.io.IOException; +import java.util.Iterator; + +public class MatchAllFilter implements DimensionFilter { + @Override + public void initialiseForSegment(StarTreeValues starTreeValues, SearchContext searchContext) throws IOException { + + } + + @Override + public void matchStarTreeNodes(StarTreeNode parentNode, StarTreeValues starTreeValues, StarTreeNodeCollector collector) + throws IOException { + if (parentNode != null) { + for (Iterator it = parentNode.getChildrenIterator(); it.hasNext();) { + StarTreeNode starTreeNode = it.next(); + if (starTreeNode.getStarTreeNodeType() == StarTreeNodeType.DEFAULT.getValue() + || starTreeNode.getStarTreeNodeType() == StarTreeNodeType.NULL.getValue()) { + collector.collectStarTreeNode(starTreeNode); + } + } + } + } + + @Override + public boolean matchDimValue(long ordinal, StarTreeValues starTreeValues) { + return true; + } +} diff --git a/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java b/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java index c0583cd5227b2..a378ac8fd2de3 100644 --- a/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java +++ b/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java @@ -15,6 +15,7 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.Rounding; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.Strings; import org.opensearch.index.IndexService; import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; @@ -41,6 +42,7 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.IndicesService; import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; import org.opensearch.search.aggregations.AggregatorFactory; import org.opensearch.search.aggregations.SearchContextAggregations; @@ -64,14 +66,18 @@ import org.opensearch.test.OpenSearchSingleNodeTestCase; import java.io.IOException; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Set; +import java.util.function.Supplier; import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX; +import static org.opensearch.search.aggregations.AggregationBuilders.count; import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram; import static org.opensearch.search.aggregations.AggregationBuilders.max; import static org.opensearch.search.aggregations.AggregationBuilders.medianAbsoluteDeviation; +import static org.opensearch.search.aggregations.AggregationBuilders.min; import static org.opensearch.search.aggregations.AggregationBuilders.range; import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.opensearch.search.aggregations.AggregationBuilders.terms; @@ -241,6 +247,135 @@ public void testQueryParsingForMetricAggregations() throws IOException { searchContext.close(); } + public void testStarTreeNestedAggregations() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); + setStarTreeIndexSetting("true"); + + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .put(IndexMetadata.INDEX_APPEND_ONLY_ENABLED_SETTING.getKey(), true) + .build(); + CreateIndexRequestBuilder builder = client().admin() + .indices() + .prepareCreate("test") + .setSettings(settings) + .setMapping(NumericTermsAggregatorTests.getExpandedMapping(1, false)); + + createIndex("test", builder); + + IndicesService indicesService = getInstanceFromNode(IndicesService.class); + IndexService indexService = indicesService.indexServiceSafe(resolveIndex("test")); + IndexShard indexShard = indexService.getShard(0); + ShardSearchRequest request = new ShardSearchRequest( + OriginalIndices.NONE, + new SearchRequest().allowPartialSearchResults(true), + indexShard.shardId(), + 1, + new AliasFilter(null, Strings.EMPTY_ARRAY), + 1.0f, + -1, + null, + null + ); + + QueryBuilder baseQuery; + SearchContext searchContext = createSearchContext(indexService); + StarTreeFieldConfiguration starTreeFieldConfiguration = new StarTreeFieldConfiguration( + 1, + Collections.emptySet(), + StarTreeFieldConfiguration.StarTreeBuildMode.ON_HEAP + ); + + List>> aggregationSuppliers = getAggregationSuppliers(); + + ValuesSourceAggregationBuilder[] aggBuilders = { + sum("_sum").field(FIELD_NAME), + max("_max").field(FIELD_NAME), + min("_min").field(FIELD_NAME), + count("_count").field(FIELD_NAME), }; + + // 3-LEVELS [BUCKET -> BUCKET -> METRIC] + for (Supplier> firstSupplier : aggregationSuppliers) { + for (Supplier> secondSupplier : aggregationSuppliers) { + for (ValuesSourceAggregationBuilder metricAgg : aggBuilders) { + + ValuesSourceAggregationBuilder secondBucket = secondSupplier.get().subAggregation(metricAgg); + ValuesSourceAggregationBuilder firstBucket = firstSupplier.get().subAggregation(secondBucket); + + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().size(0) + .query(new MatchAllQueryBuilder()) + .aggregation(firstBucket); + + MetricStat stat = getMetricStatFromAgg(metricAgg); + List metrics = List.of(new Metric(FIELD_NAME, List.of(stat))); + + assertStarTreeContext( + request, + sourceBuilder, + getStarTreeQueryContext( + searchContext, + starTreeFieldConfiguration, + "startree1", + -1, + getDimensions(aggregationSuppliers.indexOf(firstSupplier), aggregationSuppliers.indexOf(secondSupplier)), + metrics, + new MatchAllQueryBuilder(), + sourceBuilder, + true + ), + -1 + ); + } + } + } + + // 4-LEVELS [BUCKET -> BUCKET -> BUCKET -> METRIC] + for (Supplier> firstSupplier : aggregationSuppliers) { + for (Supplier> secondSupplier : aggregationSuppliers) { + for (Supplier> thirdSupplier : aggregationSuppliers) { + for (ValuesSourceAggregationBuilder metricAgg : aggBuilders) { + + ValuesSourceAggregationBuilder thirdBucket = thirdSupplier.get().subAggregation(metricAgg); + ValuesSourceAggregationBuilder secondBucket = secondSupplier.get().subAggregation(thirdBucket); + ValuesSourceAggregationBuilder firstBucket = firstSupplier.get().subAggregation(secondBucket); + + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().size(0) + .query(new MatchAllQueryBuilder()) + .aggregation(firstBucket); + + MetricStat stat = getMetricStatFromAgg(metricAgg); + List metrics = List.of(new Metric(FIELD_NAME, List.of(stat))); + + assertStarTreeContext( + request, + sourceBuilder, + getStarTreeQueryContext( + searchContext, + starTreeFieldConfiguration, + "startree1", + -1, + getDimensions( + aggregationSuppliers.indexOf(firstSupplier), + aggregationSuppliers.indexOf(secondSupplier), + aggregationSuppliers.indexOf(thirdSupplier) + ), + metrics, + new MatchAllQueryBuilder(), + sourceBuilder, + true + ), + -1 + ); + } + } + } + } + + setStarTreeIndexSetting(null); + } + /** * Test query parsing for date histogram aggregations, with/without numeric term query */ @@ -906,4 +1041,50 @@ private StarTreeQueryContext getStarTreeQueryContext( } return starTreeQueryContext; } + + private static List getDimensions(int... indices) { + return Arrays.stream(indices).mapToObj(SearchServiceStarTreeTests::getDimensionByIndex).toList(); + } + + private static List>> getAggregationSuppliers() { + String TIMESTAMP_FIELD = "timestamp"; + String KEYWORD_FIELD = "clientip"; + String SIZE = "size"; + String STATUS = "status"; + + return List.of( + () -> terms("term_size").field(SIZE), + () -> terms("term_status").field(STATUS), + () -> dateHistogram("by_day").field(TIMESTAMP_FIELD).calendarInterval(DateHistogramInterval.DAY), + () -> range("range").field(STATUS).addRange(0, 10), + () -> terms("term_keyword").field(KEYWORD_FIELD).collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST) + ); + } + + private static Dimension getDimensionByIndex(int index) { + String TIMESTAMP_FIELD = "timestamp"; + String KEYWORD_FIELD = "clientip"; + String SIZE = "size"; + String STATUS = "status"; + + return switch (index) { + case 0 -> new NumericDimension(SIZE); + case 2 -> new DateDimension( + TIMESTAMP_FIELD, + List.of(new DateTimeUnitAdapter(Rounding.DateTimeUnit.DAY_OF_MONTH)), + DateFieldMapper.Resolution.MILLISECONDS + ); + case 4 -> new OrdinalDimension(KEYWORD_FIELD); + default -> new NumericDimension(STATUS); + }; + } + + private MetricStat getMetricStatFromAgg(ValuesSourceAggregationBuilder agg) { + String name = agg.getName(); + if (name.contains("sum")) return MetricStat.SUM; + else if (name.contains("max")) return MetricStat.MAX; + else if (name.contains("min")) return MetricStat.MIN; + else if (name.contains("count")) return MetricStat.VALUE_COUNT; + throw new IllegalArgumentException("Unknown metric aggregation: " + name); + } } diff --git a/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeFilterTests.java b/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeFilterTests.java index cd2943f23be7a..7def41891bfab 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeFilterTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeFilterTests.java @@ -120,7 +120,7 @@ public void testStarTreeFilterMerging() { StarTreeFilter starTreeFilter = new StarTreeFilter(Collections.emptyMap()); mergedStarTreeFilter = StarTreeQueryHelper.mergeDimensionFilterIfNotExists( starTreeFilter, - dimensionToMerge, + List.of(dimensionToMerge), List.of(exactMatchDimFilter) ); assertEquals(1, mergedStarTreeFilter.getDimensions().size()); @@ -131,7 +131,7 @@ public void testStarTreeFilterMerging() { starTreeFilter = new StarTreeFilter(Map.of(dimensionToMerge, List.of(rangeMatchDimFilter))); mergedStarTreeFilter = StarTreeQueryHelper.mergeDimensionFilterIfNotExists( starTreeFilter, - dimensionToMerge, + List.of(dimensionToMerge), List.of(exactMatchDimFilter) ); assertEquals(1, mergedStarTreeFilter.getDimensions().size()); @@ -142,7 +142,7 @@ public void testStarTreeFilterMerging() { starTreeFilter = new StarTreeFilter(Map.of(dimensionToMerge, List.of(rangeMatchDimFilter), "status", List.of(rangeMatchDimFilter))); mergedStarTreeFilter = StarTreeQueryHelper.mergeDimensionFilterIfNotExists( starTreeFilter, - dimensionToMerge, + List.of(dimensionToMerge), List.of(exactMatchDimFilter) ); assertEquals(2, mergedStarTreeFilter.getDimensions().size()); @@ -155,7 +155,7 @@ public void testStarTreeFilterMerging() { starTreeFilter = new StarTreeFilter(Map.of("status", List.of(rangeMatchDimFilter))); mergedStarTreeFilter = StarTreeQueryHelper.mergeDimensionFilterIfNotExists( starTreeFilter, - dimensionToMerge, + List.of(dimensionToMerge), List.of(exactMatchDimFilter) ); assertEquals(2, mergedStarTreeFilter.getDimensions().size()); diff --git a/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeNestedAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeNestedAggregatorTests.java new file mode 100644 index 0000000000000..e73778c32e166 --- /dev/null +++ b/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeNestedAggregatorTests.java @@ -0,0 +1,407 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.startree; + +import com.carrotsearch.randomizedtesting.RandomizedTest; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.opensearch.common.Rounding; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.codec.composite.CompositeIndexReader; +import org.opensearch.index.codec.composite.composite101.Composite101Codec; +import org.opensearch.index.codec.composite912.datacube.startree.StarTreeDocValuesFormatTests; +import org.opensearch.index.compositeindex.datacube.DateDimension; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.OrdinalDimension; +import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitAdapter; +import org.opensearch.index.mapper.DateFieldMapper; +import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.search.aggregations.Aggregator; +import org.opensearch.search.aggregations.InternalAggregation; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregatorTestCase; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; +import org.opensearch.search.aggregations.bucket.histogram.InternalDateHistogram; +import org.opensearch.search.aggregations.bucket.range.InternalRange; +import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.InternalTerms; +import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Random; +import java.util.function.Function; +import java.util.function.Supplier; + +import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX; +import static org.opensearch.search.aggregations.AggregationBuilders.count; +import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram; +import static org.opensearch.search.aggregations.AggregationBuilders.max; +import static org.opensearch.search.aggregations.AggregationBuilders.min; +import static org.opensearch.search.aggregations.AggregationBuilders.range; +import static org.opensearch.search.aggregations.AggregationBuilders.sum; +import static org.opensearch.search.aggregations.AggregationBuilders.terms; +import static org.opensearch.test.InternalAggregationTestCase.DEFAULT_MAX_BUCKETS; + +public class StarTreeNestedAggregatorTests extends DateHistogramAggregatorTestCase { + private static final String TIMESTAMP_FIELD = "@timestamp"; + private static final MappedFieldType TIMESTAMP_FIELD_TYPE = new DateFieldMapper.DateFieldType(TIMESTAMP_FIELD); + + private static final String KEYWORD_FIELD = "clientip"; + MappedFieldType KEYWORD_FIELD_TYPE = new KeywordFieldMapper.KeywordFieldType(KEYWORD_FIELD); + + final static String STATUS = "status"; + final static String SIZE = "size"; + private static final MappedFieldType STATUS_FIELD_TYPE = new NumberFieldMapper.NumberFieldType( + STATUS, + NumberFieldMapper.NumberType.LONG + ); + private static final MappedFieldType SIZE_FIELD_TYPE = new NumberFieldMapper.NumberFieldType(SIZE, NumberFieldMapper.NumberType.LONG); + private static FeatureFlags.TestUtils.FlagWriteLock fflock = null; + + @Before + public void setup() { + fflock = new FeatureFlags.TestUtils.FlagWriteLock(STAR_TREE_INDEX); + // FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); + } + + @After + public void teardown() throws IOException { + fflock.close(); + // FeatureFlags.initializeFeatureFlags(Settings.EMPTY); + } + + protected Codec getCodec() { + final Logger testLogger = LogManager.getLogger(MetricAggregatorTests.class); + MapperService mapperService; + try { + mapperService = StarTreeDocValuesFormatTests.createMapperService(NumericTermsAggregatorTests.getExpandedMapping(1, false)); + } catch (IOException e) { + throw new RuntimeException(e); + } + return new Composite101Codec(Lucene101Codec.Mode.BEST_SPEED, mapperService, testLogger); + } + + public void testStarTreeNestedAggregations() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setCodec(getCodec()); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + + Random random = RandomizedTest.getRandom(); + int totalDocs = 100; + + long val; + long date; + List docs = new ArrayList<>(); + // Index 100 random documents + for (int i = 0; i < totalDocs; i++) { + Document doc = new Document(); + if (randomBoolean()) { + val = random.nextInt(100); // Random int between 0 and 99 for status + doc.add(new SortedNumericDocValuesField(STATUS, val)); + } + if (randomBoolean()) { + val = random.nextInt(100); + doc.add(new SortedNumericDocValuesField(SIZE, val)); + } + if (randomBoolean()) { + val = random.nextInt(10); // Random strings for int between 0 and 9 for keyword terms + doc.add(new SortedSetDocValuesField(KEYWORD_FIELD, new BytesRef(String.valueOf(val)))); + } + if (randomBoolean()) { + date = random.nextInt(180) * 24 * 60 * 60 * 1000L; // Random date within 180 days + doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, date)); + doc.add(new LongPoint(TIMESTAMP_FIELD, date)); + } + + iw.addDocument(doc); + docs.add(doc); + } + + if (randomBoolean()) { + iw.forceMerge(1); + } + iw.close(); + DirectoryReader ir = DirectoryReader.open(directory); + LeafReaderContext context = ir.leaves().get(0); + + SegmentReader reader = Lucene.segmentReader(context.reader()); + IndexSearcher indexSearcher = newSearcher(wrapInMockESDirectoryReader(ir), false, false); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + CompositeIndexFieldInfo starTree = compositeIndexFields.get(0); + + LinkedHashMap supportedDimensions = new LinkedHashMap<>(); + supportedDimensions.put(new NumericDimension(STATUS), STATUS_FIELD_TYPE); + supportedDimensions.put(new NumericDimension(SIZE), SIZE_FIELD_TYPE); + supportedDimensions.put( + new DateDimension( + TIMESTAMP_FIELD, + List.of( + new DateTimeUnitAdapter(Rounding.DateTimeUnit.MONTH_OF_YEAR), + new DateTimeUnitAdapter(Rounding.DateTimeUnit.DAY_OF_MONTH) + ), + DateFieldMapper.Resolution.MILLISECONDS + ), + new DateFieldMapper.DateFieldType(TIMESTAMP_FIELD) + ); + supportedDimensions.put(new OrdinalDimension(KEYWORD_FIELD), KEYWORD_FIELD_TYPE); + + Query query = new MatchAllDocsQuery(); + QueryBuilder queryBuilder = null; + + ValuesSourceAggregationBuilder[] aggBuilders = { + sum("_sum").field(STATUS), + max("_max").field(STATUS), + min("_min").field(STATUS), + count("_count").field(STATUS) }; + + List>> aggregationSuppliers = List.of( + () -> terms("term_size").field(SIZE), + () -> terms("term_status").field(STATUS), + () -> range("range_agg").field(STATUS).addRange(10, 30).addRange(30, 50), + () -> terms("term_keyword").field(KEYWORD_FIELD).collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST), + () -> dateHistogram("by_day").field(TIMESTAMP_FIELD).calendarInterval(DateHistogramInterval.DAY) + ); + // 3-LEVELS [BUCKET -> BUCKET -> METRIC] + for (ValuesSourceAggregationBuilder aggregationBuilder : aggBuilders) { + query = new MatchAllDocsQuery(); + queryBuilder = null; + for (Supplier> outerSupplier : aggregationSuppliers) { + for (Supplier> innerSupplier : aggregationSuppliers) { + if (innerSupplier == outerSupplier) { + continue; + } + + ValuesSourceAggregationBuilder inner = innerSupplier.get().subAggregation(aggregationBuilder); + ValuesSourceAggregationBuilder outer = outerSupplier.get().subAggregation(inner); + + // Skipping [DateHistogramAggregationBuilder + RangeAggregationBuilder] combinations for a failing assertion in + // searchAndReduceStarTree + boolean skipFailingAssertion = (inner instanceof RangeAggregationBuilder + && outer instanceof DateHistogramAggregationBuilder); + testCase(indexSearcher, query, queryBuilder, outer, starTree, supportedDimensions, skipFailingAssertion); + + // Numeric-terms query with numeric terms aggregation + for (int cases = 0; cases < 10; cases++) { + String queryField; + long queryValue; + if (randomBoolean()) { + queryField = STATUS; + } else { + queryField = SIZE; + } + queryValue = random.nextInt(10); + query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); + queryBuilder = new TermQueryBuilder(queryField, queryValue); + testCase(indexSearcher, query, queryBuilder, outer, starTree, supportedDimensions, skipFailingAssertion); + } + } + } + } + + // // 4-LEVELS [BUCKET -> BUCKET -> BUCKET -> METRIC] + for (ValuesSourceAggregationBuilder aggregationBuilder : aggBuilders) { + query = new MatchAllDocsQuery(); + queryBuilder = null; + for (Supplier> outermostSupplier : aggregationSuppliers) { + for (Supplier> middleSupplier : aggregationSuppliers) { + for (Supplier> innerSupplier : aggregationSuppliers) { + + ValuesSourceAggregationBuilder innermost = innerSupplier.get().subAggregation(aggregationBuilder); + ValuesSourceAggregationBuilder middle = middleSupplier.get().subAggregation(innermost); + ValuesSourceAggregationBuilder outermost = outermostSupplier.get().subAggregation(middle); + + // Skipping [DateHistogramAggregationBuilder + RangeAggregationBuilder] combinations for a failing assertion in + // searchAndReduceStarTree + boolean skipFailingAssertion = (middle instanceof RangeAggregationBuilder + && outermost instanceof DateHistogramAggregationBuilder) + || (middle instanceof DateHistogramAggregationBuilder && innermost instanceof RangeAggregationBuilder); + testCase(indexSearcher, query, queryBuilder, outermost, starTree, supportedDimensions, skipFailingAssertion); + + // Numeric-terms query with numeric terms aggregation + for (int cases = 0; cases < 10; cases++) { + String queryField; + long queryValue; + if (randomBoolean()) { + queryField = STATUS; + } else { + queryField = SIZE; + } + queryValue = random.nextInt(10); + query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); + queryBuilder = new TermQueryBuilder(queryField, queryValue); + testCase(indexSearcher, query, queryBuilder, outermost, starTree, supportedDimensions, skipFailingAssertion); + } + + } + } + } + } + + ir.close(); + directory.close(); + + } + + private void testCase( + IndexSearcher indexSearcher, + Query query, + QueryBuilder queryBuilder, + ValuesSourceAggregationBuilder aggregationBuilder, + CompositeIndexFieldInfo starTree, + LinkedHashMap supportedDimensions, + boolean skipFailingAssertion + ) throws IOException { + + if (aggregationBuilder instanceof TermsAggregationBuilder) { + assertEqualStarTreeAggregation( + InternalTerms.class, + InternalTerms::getBuckets, + aggregationBuilder, + indexSearcher, + query, + queryBuilder, + starTree, + supportedDimensions, + skipFailingAssertion, + STATUS_FIELD_TYPE, + SIZE_FIELD_TYPE, + TIMESTAMP_FIELD_TYPE, + KEYWORD_FIELD_TYPE + ); + + } else if (aggregationBuilder instanceof DateHistogramAggregationBuilder) { + assertEqualStarTreeAggregation( + InternalDateHistogram.class, + InternalDateHistogram::getBuckets, + aggregationBuilder, + indexSearcher, + query, + queryBuilder, + starTree, + supportedDimensions, + skipFailingAssertion, + STATUS_FIELD_TYPE, + TIMESTAMP_FIELD_TYPE, + SIZE_FIELD_TYPE, + KEYWORD_FIELD_TYPE + ); + + } else if (aggregationBuilder instanceof RangeAggregationBuilder) { + assertEqualStarTreeAggregation( + InternalRange.class, + InternalRange::getBuckets, + aggregationBuilder, + indexSearcher, + query, + queryBuilder, + starTree, + supportedDimensions, + skipFailingAssertion, + STATUS_FIELD_TYPE, + SIZE_FIELD_TYPE, + TIMESTAMP_FIELD_TYPE, + KEYWORD_FIELD_TYPE + ); + } + } + + private void assertEqualStarTreeAggregation( + Class clazz, + Function> getBuckets, + ValuesSourceAggregationBuilder aggregationBuilder, + IndexSearcher indexSearcher, + Query query, + QueryBuilder queryBuilder, + CompositeIndexFieldInfo starTree, + LinkedHashMap supportedDimensions, + boolean skipFailingAssertion, + MappedFieldType... fieldTypes + ) throws IOException { + + T defaultAgg = clazz.cast( + searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + aggregationBuilder, + null, + null, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + false, + skipFailingAssertion, + fieldTypes + ) + ); + + T starTreeAgg = clazz.cast( + searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + aggregationBuilder, + starTree, + supportedDimensions, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + true, + skipFailingAssertion, + fieldTypes + ) + ); + + List defaultBuckets = getBuckets.apply(defaultAgg); + List starTreeBuckets = getBuckets.apply(starTreeAgg); + + assertEquals(defaultBuckets.size(), starTreeBuckets.size()); + assertEquals(defaultBuckets, starTreeBuckets); + } + +} diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index df982d4f0c7f3..d105fc1c53006 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -775,6 +775,26 @@ protected A searchAndReduc AggregatorFactory aggregatorFactory, boolean assertCollectorEarlyTermination, MappedFieldType... fieldTypes + ) throws IOException { + return searchAndReduceStarTree(indexSettings, searcher, query, queryBuilder, builder, compositeIndexFieldInfo, supportedDimensions, + supportedMetrics, maxBucket, hasNested, aggregatorFactory, assertCollectorEarlyTermination, false, fieldTypes); + } + + protected A searchAndReduceStarTree( + IndexSettings indexSettings, + IndexSearcher searcher, + Query query, + QueryBuilder queryBuilder, + AggregationBuilder builder, + CompositeIndexFieldInfo compositeIndexFieldInfo, + LinkedHashMap supportedDimensions, + List supportedMetrics, + int maxBucket, + boolean hasNested, + AggregatorFactory aggregatorFactory, + boolean assertCollectorEarlyTermination, + boolean skipFailingAssertion, + MappedFieldType... fieldTypes ) throws IOException { query = query.rewrite(searcher); final IndexReaderContext ctx = searcher.getTopReaderContext(); @@ -823,7 +843,7 @@ protected A searchAndReduc @SuppressWarnings("unchecked") A internalAgg = (A) aggs.get(0).reduce(aggs, context); - doAssertReducedMultiBucketConsumer(internalAgg, reduceBucketConsumer); + if(!skipFailingAssertion) doAssertReducedMultiBucketConsumer(internalAgg, reduceBucketConsumer); return internalAgg; } From a58027f3fc1065f395cdcda90763dc7813856a57 Mon Sep 17 00:00:00 2001 From: Shailesh Singh Date: Mon, 28 Apr 2025 11:56:52 +0530 Subject: [PATCH 2/2] added feature flag and address comments --- .../search/startree/StarTreeQueryContext.java | 8 +++-- .../StarTreeNestedAggregatorTests.java | 33 +++++++++---------- .../aggregations/AggregatorTestCase.java | 4 +-- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java index 9e6315f3d5a13..9128087a6fb4a 100644 --- a/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java +++ b/server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java @@ -10,6 +10,7 @@ import org.apache.lucene.util.FixedBitSet; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import org.opensearch.index.compositeindex.datacube.DateDimension; import org.opensearch.index.compositeindex.datacube.Dimension; @@ -250,12 +251,13 @@ private static boolean validateNestedAggregationStructure( AggregatorFactory aggregatorFactory ) { boolean isValid; + boolean isFeatureFlagEnabled = FeatureFlags.isEnabled(FeatureFlags.STAR_TREE_INDEX_SETTING); switch (aggregatorFactory) { case TermsAggregatorFactory termsAggregatorFactory -> isValid = validateKeywordTermsAggregationSupport( compositeIndexFieldInfo, termsAggregatorFactory - ); + ) && isFeatureFlagEnabled; case DateHistogramAggregatorFactory dateHistogramAggregatorFactory -> isValid = validateDateHistogramSupport( compositeIndexFieldInfo, dateHistogramAggregatorFactory @@ -263,7 +265,7 @@ private static boolean validateNestedAggregationStructure( case RangeAggregatorFactory rangeAggregatorFactory -> isValid = validateRangeAggregationSupport( compositeIndexFieldInfo, rangeAggregatorFactory - ); + ) && isFeatureFlagEnabled; case MetricAggregatorFactory metricAggregatorFactory -> { isValid = validateStarTreeMetricSupport(compositeIndexFieldInfo, metricAggregatorFactory); return isValid && metricAggregatorFactory.getSubFactories().getFactories().length == 0; @@ -273,7 +275,7 @@ private static boolean validateNestedAggregationStructure( } } - if (!isValid) return false; + if (isValid == false) return false; for (AggregatorFactory subFactory : aggregatorFactory.getSubFactories().getFactories()) { if (!validateNestedAggregationStructure(compositeIndexFieldInfo, subFactory)) { diff --git a/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeNestedAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeNestedAggregatorTests.java index e73778c32e166..efc19caf839e1 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeNestedAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeNestedAggregatorTests.java @@ -208,18 +208,15 @@ public void testStarTreeNestedAggregations() throws IOException { queryBuilder = null; for (Supplier> outerSupplier : aggregationSuppliers) { for (Supplier> innerSupplier : aggregationSuppliers) { - if (innerSupplier == outerSupplier) { - continue; - } ValuesSourceAggregationBuilder inner = innerSupplier.get().subAggregation(aggregationBuilder); ValuesSourceAggregationBuilder outer = outerSupplier.get().subAggregation(inner); - // Skipping [DateHistogramAggregationBuilder + RangeAggregationBuilder] combinations for a failing assertion in + // Skipping [DateHistogramAggregationBuilder + RangeAggregationBuilder] combinations for a ReducedMultiBucketConsumer assertion in // searchAndReduceStarTree - boolean skipFailingAssertion = (inner instanceof RangeAggregationBuilder + boolean skipReducedMultiBucketConsumerAssertion = (inner instanceof RangeAggregationBuilder && outer instanceof DateHistogramAggregationBuilder); - testCase(indexSearcher, query, queryBuilder, outer, starTree, supportedDimensions, skipFailingAssertion); + testCase(indexSearcher, query, queryBuilder, outer, starTree, supportedDimensions, skipReducedMultiBucketConsumerAssertion); // Numeric-terms query with numeric terms aggregation for (int cases = 0; cases < 10; cases++) { @@ -233,7 +230,7 @@ public void testStarTreeNestedAggregations() throws IOException { queryValue = random.nextInt(10); query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); queryBuilder = new TermQueryBuilder(queryField, queryValue); - testCase(indexSearcher, query, queryBuilder, outer, starTree, supportedDimensions, skipFailingAssertion); + testCase(indexSearcher, query, queryBuilder, outer, starTree, supportedDimensions, skipReducedMultiBucketConsumerAssertion); } } } @@ -251,12 +248,12 @@ public void testStarTreeNestedAggregations() throws IOException { ValuesSourceAggregationBuilder middle = middleSupplier.get().subAggregation(innermost); ValuesSourceAggregationBuilder outermost = outermostSupplier.get().subAggregation(middle); - // Skipping [DateHistogramAggregationBuilder + RangeAggregationBuilder] combinations for a failing assertion in + // Skipping [DateHistogramAggregationBuilder + RangeAggregationBuilder] combinations for a ReducedMultiBucketConsumer assertion in // searchAndReduceStarTree - boolean skipFailingAssertion = (middle instanceof RangeAggregationBuilder + boolean skipReducedMultiBucketConsumerAssertion = (middle instanceof RangeAggregationBuilder && outermost instanceof DateHistogramAggregationBuilder) || (middle instanceof DateHistogramAggregationBuilder && innermost instanceof RangeAggregationBuilder); - testCase(indexSearcher, query, queryBuilder, outermost, starTree, supportedDimensions, skipFailingAssertion); + testCase(indexSearcher, query, queryBuilder, outermost, starTree, supportedDimensions, skipReducedMultiBucketConsumerAssertion); // Numeric-terms query with numeric terms aggregation for (int cases = 0; cases < 10; cases++) { @@ -270,7 +267,7 @@ public void testStarTreeNestedAggregations() throws IOException { queryValue = random.nextInt(10); query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); queryBuilder = new TermQueryBuilder(queryField, queryValue); - testCase(indexSearcher, query, queryBuilder, outermost, starTree, supportedDimensions, skipFailingAssertion); + testCase(indexSearcher, query, queryBuilder, outermost, starTree, supportedDimensions, skipReducedMultiBucketConsumerAssertion); } } @@ -290,7 +287,7 @@ private void testCase( ValuesSourceAggregationBuilder aggregationBuilder, CompositeIndexFieldInfo starTree, LinkedHashMap supportedDimensions, - boolean skipFailingAssertion + boolean skipReducedMultiBucketConsumerAssertion ) throws IOException { if (aggregationBuilder instanceof TermsAggregationBuilder) { @@ -303,7 +300,7 @@ private void testCase( queryBuilder, starTree, supportedDimensions, - skipFailingAssertion, + skipReducedMultiBucketConsumerAssertion, STATUS_FIELD_TYPE, SIZE_FIELD_TYPE, TIMESTAMP_FIELD_TYPE, @@ -320,7 +317,7 @@ private void testCase( queryBuilder, starTree, supportedDimensions, - skipFailingAssertion, + skipReducedMultiBucketConsumerAssertion, STATUS_FIELD_TYPE, TIMESTAMP_FIELD_TYPE, SIZE_FIELD_TYPE, @@ -337,7 +334,7 @@ private void testCase( queryBuilder, starTree, supportedDimensions, - skipFailingAssertion, + skipReducedMultiBucketConsumerAssertion, STATUS_FIELD_TYPE, SIZE_FIELD_TYPE, TIMESTAMP_FIELD_TYPE, @@ -355,7 +352,7 @@ private void assertEqualStarTreeAggregation( QueryBuilder queryBuilder, CompositeIndexFieldInfo starTree, LinkedHashMap supportedDimensions, - boolean skipFailingAssertion, + boolean skipReducedMultiBucketConsumerAssertion, MappedFieldType... fieldTypes ) throws IOException { @@ -373,7 +370,7 @@ private void assertEqualStarTreeAggregation( false, null, false, - skipFailingAssertion, + skipReducedMultiBucketConsumerAssertion, fieldTypes ) ); @@ -392,7 +389,7 @@ private void assertEqualStarTreeAggregation( false, null, true, - skipFailingAssertion, + skipReducedMultiBucketConsumerAssertion, fieldTypes ) ); diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index d105fc1c53006..3830b0acb989a 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -793,7 +793,7 @@ protected A searchAndReduc boolean hasNested, AggregatorFactory aggregatorFactory, boolean assertCollectorEarlyTermination, - boolean skipFailingAssertion, + boolean skipReducedMultiBucketConsumerAssertion, MappedFieldType... fieldTypes ) throws IOException { query = query.rewrite(searcher); @@ -843,7 +843,7 @@ protected A searchAndReduc @SuppressWarnings("unchecked") A internalAgg = (A) aggs.get(0).reduce(aggs, context); - if(!skipFailingAssertion) doAssertReducedMultiBucketConsumer(internalAgg, reduceBucketConsumer); + if(!skipReducedMultiBucketConsumerAssertion) doAssertReducedMultiBucketConsumer(internalAgg, reduceBucketConsumer); return internalAgg; }