Skip to content

Commit 8531924

Browse files
authored
Add query rewriting infrastructure to reduce query complexity (opensearch-project#19060)
* Add query rewriting infrastructure to reduce query complexity Implements three query optimizations that work together: - Boolean flattening: removes unnecessary nested boolean queries - Terms merging: combines multiple term queries on same field in filter/should contexts - Match-all removal: eliminates redundant match_all queries Key features: - 60-70% reduction in query nodes for typical filtered queries - Feature flag: search.query_rewriting.enabled (default: true) - Preserves exact query semantics and results Signed-off-by: Atri Sharma <atri.jiit@gmail.com> * Fix forbidden api issues Signed-off-by: Atri Sharma <atri.jiit@gmail.com> * Update writers and get tests to pass Signed-off-by: Atri Sharma <atri.jiit@gmail.com> * Update per CI Signed-off-by: Atri Sharma <atri.jiit@gmail.com> * Fix term merging threshold and update comments Signed-off-by: Atri Sharma <atri.jiit@gmail.com> * Expose setting and update per comments Signed-off-by: Atri Sharma <atri.jiit@gmail.com> * Update CHANGELOG Signed-off-by: Atri Sharma <atri.jiit@gmail.com> * Fix tests and ensure scoring MATCH ALL query is preserved Signed-off-by: Atri Sharma <atri.jiit@gmail.com> * Migrate must to filter and must not to should optimizations to query rewriting infrastructure This commit migrates two existing query optimizations from BoolQueryBuilder to the new query rewriting infrastructure: 1. **MustToFilterRewriter**: Moves non scoring queries (range, geo, numeric term/terms/match) from must to filter clauses to avoid unnecessary scoring calculations (from PR opensearch-project#18541) 2. **MustNotToShouldRewriter**: Transforms negative queries into positive complements for better performance on single valued numeric fields (from PRs opensearch-project#17655 and opensearch-project#18498) Changes: Add MustToFilterRewriter with priority 150 (runs after boolean flattening) Add MustNotToShouldRewriter with priority 175 (runs after must to filter) Register both rewriters in QueryRewriterRegistry Add comprehensive test suites (15 tests for must to filter, 14 for must not to should) Disable legacy implementations in BoolQueryBuilder Comment out BoolQueryBuilder tests that relied on the old implementations The new rewriters maintain full backward compatibility while providing: Better separation of concerns Recursive rewriting for nested boolean queries Proper error handling and logging Consistent priority based execution order Signed-off-by: Atri Sharma <atri.jiit@gmail.com> * Handle fields with missing fields Signed-off-by: Atri Sharma <atri.jiit@gmail.com> --------- Signed-off-by: Atri Sharma <atri.jiit@gmail.com>
1 parent 816e711 commit 8531924

19 files changed

+3012
-208
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
77
### Added
88
- Expand fetch phase profiling to support inner hits and top hits aggregation phases ([##18936](https://github.com/opensearch-project/OpenSearch/pull/18936))
99
- Add temporal routing processors for time-based document routing ([#18920](https://github.com/opensearch-project/OpenSearch/issues/18920))
10+
- Implement Query Rewriting Infrastructure ([#19060](https://github.com/opensearch-project/OpenSearch/pull/19060))
1011
- The dynamic mapping parameter supports false_allow_templates ([#19065](https://github.com/opensearch-project/OpenSearch/pull/19065))
1112
- Add a toBuilder method in EngineConfig to support easy modification of configs([#19054](https://github.com/opensearch-project/OpenSearch/pull/19054))
1213
- Add StoreFactory plugin interface for custom Store implementations([#19091](https://github.com/opensearch-project/OpenSearch/pull/19091))

server/src/main/java/org/opensearch/common/settings/ClusterSettings.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,8 @@ public void apply(Settings value, Settings current, Settings previous) {
804804
BlobStoreRepository.SNAPSHOT_REPOSITORY_DATA_CACHE_THRESHOLD,
805805

806806
SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING,
807+
SearchService.QUERY_REWRITING_ENABLED_SETTING,
808+
SearchService.QUERY_REWRITING_TERMS_THRESHOLD_SETTING,
807809

808810
// Composite index settings
809811
CompositeIndexSettings.STAR_TREE_INDEX_ENABLED_SETTING,

server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@
4949
import org.opensearch.core.xcontent.ObjectParser;
5050
import org.opensearch.core.xcontent.XContentBuilder;
5151
import org.opensearch.core.xcontent.XContentParser;
52-
import org.opensearch.index.mapper.MappedFieldType;
53-
import org.opensearch.index.mapper.NumberFieldMapper;
5452

5553
import java.io.IOException;
5654
import java.util.ArrayList;
@@ -402,9 +400,6 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws
402400
return any.get();
403401
}
404402

405-
changed |= rewriteMustNotRangeClausesToShould(newBuilder, queryRewriteContext);
406-
changed |= rewriteMustClausesToFilter(newBuilder, queryRewriteContext);
407-
408403
if (changed) {
409404
newBuilder.adjustPureNegative = adjustPureNegative;
410405
if (minimumShouldMatch != null) {
@@ -559,53 +554,4 @@ private boolean checkAllDocsHaveOneValue(List<LeafReaderContext> contexts, Strin
559554
}
560555
return true;
561556
}
562-
563-
private boolean rewriteMustClausesToFilter(BoolQueryBuilder newBuilder, QueryRewriteContext queryRewriteContext) {
564-
// If we have must clauses which return the same score for all matching documents, like numeric term queries or ranges,
565-
// moving them from must clauses to filter clauses improves performance in some cases.
566-
// This works because it can let Lucene use MaxScoreCache to skip non-competitive docs.
567-
boolean changed = false;
568-
Set<QueryBuilder> mustClausesToMove = new HashSet<>();
569-
570-
QueryShardContext shardContext;
571-
if (queryRewriteContext == null) {
572-
shardContext = null;
573-
} else {
574-
shardContext = queryRewriteContext.convertToShardContext(); // can still be null
575-
}
576-
577-
for (QueryBuilder clause : mustClauses) {
578-
if (isClauseIrrelevantToScoring(clause, shardContext)) {
579-
mustClausesToMove.add(clause);
580-
changed = true;
581-
}
582-
}
583-
584-
newBuilder.mustClauses.removeAll(mustClausesToMove);
585-
newBuilder.filterClauses.addAll(mustClausesToMove);
586-
return changed;
587-
}
588-
589-
private boolean isClauseIrrelevantToScoring(QueryBuilder clause, QueryShardContext context) {
590-
// This is an incomplete list of clauses this might apply for; it can be expanded in future.
591-
592-
// If a clause is purely numeric, for example a date range, its score is unimportant as
593-
// it'll be the same for all returned docs
594-
if (clause instanceof RangeQueryBuilder) return true;
595-
if (clause instanceof GeoBoundingBoxQueryBuilder) return true;
596-
597-
// Further optimizations depend on knowing whether the field is numeric.
598-
// QueryBuilder.doRewrite() is called several times in the search flow, and the shard context telling us this
599-
// is only available the last time, when it's called from SearchService.executeQueryPhase().
600-
// Skip moving these clauses if we don't have the shard context.
601-
if (context == null) return false;
602-
if (!(clause instanceof WithFieldName wfn)) return false;
603-
MappedFieldType fieldType = context.fieldMapper(wfn.fieldName());
604-
if (!(fieldType instanceof NumberFieldMapper.NumberFieldType)) return false;
605-
606-
if (clause instanceof MatchQueryBuilder) return true;
607-
if (clause instanceof TermQueryBuilder) return true;
608-
if (clause instanceof TermsQueryBuilder) return true;
609-
return false;
610-
}
611557
}

server/src/main/java/org/opensearch/search/SearchService.java

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@
136136
import org.opensearch.search.profile.Profilers;
137137
import org.opensearch.search.profile.SearchProfileShardResults;
138138
import org.opensearch.search.query.QueryPhase;
139+
import org.opensearch.search.query.QueryRewriterRegistry;
139140
import org.opensearch.search.query.QuerySearchRequest;
140141
import org.opensearch.search.query.QuerySearchResult;
141142
import org.opensearch.search.query.ScrollQuerySearchResult;
@@ -276,6 +277,27 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
276277
Property.Deprecated
277278
);
278279

280+
public static final Setting<Boolean> QUERY_REWRITING_ENABLED_SETTING = Setting.boolSetting(
281+
"search.query_rewriting.enabled",
282+
true,
283+
Property.Dynamic,
284+
Property.NodeScope
285+
);
286+
287+
/**
288+
* Controls the threshold for the number of term queries on the same field that triggers
289+
* the TermsMergingRewriter to combine them into a single terms query. For example,
290+
* if set to 16 (default), when 16 or more term queries target the same field within
291+
* a boolean clause, they will be merged into a single terms query for better performance.
292+
*/
293+
public static final Setting<Integer> QUERY_REWRITING_TERMS_THRESHOLD_SETTING = Setting.intSetting(
294+
"search.query_rewriting.terms_threshold",
295+
16,
296+
2, // minimum value
297+
Property.Dynamic,
298+
Property.NodeScope
299+
);
300+
279301
// Allow concurrent segment search for all requests
280302
public static final String CONCURRENT_SEGMENT_SEARCH_MODE_ALL = "all";
281303

@@ -507,6 +529,10 @@ public SearchService(
507529
this.concurrentSearchDeciderFactories = concurrentSearchDeciderFactories;
508530

509531
this.pluginProfilers = pluginProfilers;
532+
533+
// Initialize QueryRewriterRegistry with cluster settings so TermsMergingRewriter
534+
// can register its settings update consumer
535+
QueryRewriterRegistry.INSTANCE.initialize(settings, clusterService.getClusterSettings());
510536
}
511537

512538
private void validateKeepAlives(TimeValue defaultKeepAlive, TimeValue maxKeepAlive) {
@@ -1488,8 +1514,13 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc
14881514
context.size(source.size());
14891515
Map<String, InnerHitContextBuilder> innerHitBuilders = new HashMap<>();
14901516
if (source.query() != null) {
1491-
InnerHitContextBuilder.extractInnerHits(source.query(), innerHitBuilders);
1492-
context.parsedQuery(queryShardContext.toQuery(source.query()));
1517+
QueryBuilder query = source.query();
1518+
1519+
// Apply query rewriting optimizations
1520+
query = QueryRewriterRegistry.INSTANCE.rewrite(query, queryShardContext);
1521+
1522+
InnerHitContextBuilder.extractInnerHits(query, innerHitBuilders);
1523+
context.parsedQuery(queryShardContext.toQuery(query));
14931524
}
14941525
if (source.postFilter() != null) {
14951526
InnerHitContextBuilder.extractInnerHits(source.postFilter(), innerHitBuilders);
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.search.query;
10+
11+
import org.opensearch.common.annotation.ExperimentalApi;
12+
import org.opensearch.index.query.QueryBuilder;
13+
import org.opensearch.index.query.QueryShardContext;
14+
15+
/**
16+
* Interface for query rewriting implementations that optimize query structure
17+
* before conversion to Lucene queries.
18+
*
19+
* @opensearch.experimental
20+
*/
21+
@ExperimentalApi
22+
public interface QueryRewriter {
23+
24+
/**
25+
* Rewrites the given query builder to a more optimal form.
26+
*
27+
* @param query The query to rewrite
28+
* @param context The search execution context
29+
* @return The rewritten query (may be the same instance if no rewrite needed)
30+
*/
31+
QueryBuilder rewrite(QueryBuilder query, QueryShardContext context);
32+
33+
/**
34+
* Returns the priority of this rewriter. Lower values execute first.
35+
* This allows control over rewrite ordering when multiple rewriters
36+
* may interact.
37+
*
38+
* @return The priority value
39+
*/
40+
default int priority() {
41+
return 1000;
42+
}
43+
44+
/**
45+
* Returns the name of this rewriter for debugging and profiling.
46+
*
47+
* @return The rewriter name
48+
*/
49+
String name();
50+
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.search.query;
10+
11+
import org.apache.logging.log4j.LogManager;
12+
import org.apache.logging.log4j.Logger;
13+
import org.opensearch.common.settings.ClusterSettings;
14+
import org.opensearch.common.settings.Settings;
15+
import org.opensearch.index.query.QueryBuilder;
16+
import org.opensearch.index.query.QueryShardContext;
17+
import org.opensearch.search.SearchService;
18+
import org.opensearch.search.query.rewriters.BooleanFlatteningRewriter;
19+
import org.opensearch.search.query.rewriters.MatchAllRemovalRewriter;
20+
import org.opensearch.search.query.rewriters.MustNotToShouldRewriter;
21+
import org.opensearch.search.query.rewriters.MustToFilterRewriter;
22+
import org.opensearch.search.query.rewriters.TermsMergingRewriter;
23+
24+
import java.util.ArrayList;
25+
import java.util.Comparator;
26+
import java.util.List;
27+
import java.util.concurrent.CopyOnWriteArrayList;
28+
29+
/**
30+
* Registry for query rewriters
31+
*
32+
* @opensearch.internal
33+
*/
34+
public final class QueryRewriterRegistry {
35+
36+
private static final Logger logger = LogManager.getLogger(QueryRewriterRegistry.class);
37+
38+
public static final QueryRewriterRegistry INSTANCE = new QueryRewriterRegistry();
39+
40+
/**
41+
* Default rewriters.
42+
* CopyOnWriteArrayList is used for thread-safety during registration.
43+
*/
44+
private final CopyOnWriteArrayList<QueryRewriter> rewriters;
45+
46+
/**
47+
* Whether query rewriting is enabled.
48+
*/
49+
private volatile boolean enabled;
50+
51+
private QueryRewriterRegistry() {
52+
this.rewriters = new CopyOnWriteArrayList<>();
53+
54+
// Register default rewriters using singletons
55+
registerRewriter(BooleanFlatteningRewriter.INSTANCE);
56+
registerRewriter(MustToFilterRewriter.INSTANCE);
57+
registerRewriter(MustNotToShouldRewriter.INSTANCE);
58+
registerRewriter(MatchAllRemovalRewriter.INSTANCE);
59+
registerRewriter(TermsMergingRewriter.INSTANCE);
60+
}
61+
62+
/**
63+
* Register a custom query rewriter.
64+
*
65+
* @param rewriter The rewriter to register
66+
*/
67+
public void registerRewriter(QueryRewriter rewriter) {
68+
if (rewriter != null) {
69+
rewriters.add(rewriter);
70+
logger.info("Registered query rewriter: {}", rewriter.name());
71+
}
72+
}
73+
74+
/**
75+
* Initialize the registry with cluster settings.
76+
* This must be called once during system startup to properly configure
77+
* the TermsMergingRewriter with settings and update consumers.
78+
*
79+
* @param settings Initial cluster settings
80+
* @param clusterSettings Cluster settings for registering update consumers
81+
*/
82+
public void initialize(Settings settings, ClusterSettings clusterSettings) {
83+
TermsMergingRewriter.INSTANCE.initialize(settings, clusterSettings);
84+
this.enabled = SearchService.QUERY_REWRITING_ENABLED_SETTING.get(settings);
85+
clusterSettings.addSettingsUpdateConsumer(
86+
SearchService.QUERY_REWRITING_ENABLED_SETTING,
87+
(Boolean enabled) -> this.enabled = enabled
88+
);
89+
}
90+
91+
public QueryBuilder rewrite(QueryBuilder query, QueryShardContext context) {
92+
if (!enabled || query == null) {
93+
return query;
94+
}
95+
96+
List<QueryRewriter> sortedRewriters = new ArrayList<>(rewriters);
97+
sortedRewriters.sort(Comparator.comparingInt(QueryRewriter::priority));
98+
99+
QueryBuilder current = query;
100+
for (QueryRewriter rewriter : sortedRewriters) {
101+
try {
102+
QueryBuilder rewritten = rewriter.rewrite(current, context);
103+
if (rewritten != current) {
104+
current = rewritten;
105+
}
106+
} catch (Exception e) {
107+
logger.warn("Query rewriter {} failed: {}", rewriter.name(), e.getMessage());
108+
}
109+
}
110+
111+
return current;
112+
}
113+
}

0 commit comments

Comments
 (0)