-
Notifications
You must be signed in to change notification settings - Fork 25.3k
Fix Semantic Query Rewrite Interception Drops Boosts #129282
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
ccd64ae
9338cd5
370931d
b85abda
5db2686
2ce691e
4100200
3406ae1
d07952a
f133632
a9048f0
5a1dab9
6cef441
faa35ea
675fb22
13e791e
3a5a30f
016e448
70b228e
d5e7caa
efcf9c4
00eb6ad
f449299
6db0abf
daf2cb4
b88b077
9e725cb
651ee2b
a356b44
71eac8d
d71bf2c
675463c
201d27c
daf2f6e
2521b48
61f9445
f4cadaa
08909de
37bfc43
fa5cfe7
0640631
404efcf
f73285d
96f5aa6
3065e5b
828d8c2
6a2e0a5
bde54df
d08dbdd
fa955c3
916b1cc
d9ef867
cde55d1
8ddda3c
873efdb
44b8aa9
104f16b
2a96d52
5dcfc1b
469f598
375ae36
c81f184
394f43a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
pr: 129282 | ||
summary: "Fix query rewrite logic to preserve `boosts` and `queryName` for `match`,\ | ||
\ `knn`, and `sparse_vector` queries on semantic_text fields" | ||
area: Search | ||
type: bug | ||
issues: [] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,7 +36,10 @@ protected String getQuery(QueryBuilder queryBuilder) { | |
|
||
@Override | ||
protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { | ||
return new SemanticQueryBuilder(indexInformation.fieldName(), getQuery(queryBuilder), false); | ||
SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(indexInformation.fieldName(), getQuery(queryBuilder), false); | ||
semanticQueryBuilder.boost(queryBuilder.boost()); | ||
semanticQueryBuilder.queryName(queryBuilder.queryName()); | ||
return semanticQueryBuilder; | ||
} | ||
|
||
@Override | ||
|
@@ -45,7 +48,10 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( | |
InferenceIndexInformationForField indexInformation | ||
) { | ||
assert (queryBuilder instanceof MatchQueryBuilder); | ||
MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) queryBuilder; | ||
MatchQueryBuilder originalMatchQueryBuilder = (MatchQueryBuilder) queryBuilder; | ||
// Create a copy for non-inference fields without boost and _name | ||
MatchQueryBuilder matchQueryBuilder = copyMatchQueryBuilder(originalMatchQueryBuilder); | ||
|
||
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); | ||
boolQueryBuilder.should( | ||
createSemanticSubQuery( | ||
|
@@ -55,11 +61,33 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( | |
) | ||
); | ||
boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder)); | ||
boolQueryBuilder.boost(queryBuilder.boost()); | ||
boolQueryBuilder.queryName(queryBuilder.queryName()); | ||
return boolQueryBuilder; | ||
Samiul-TheSoccerFan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
@Override | ||
public String getQueryName() { | ||
return MatchQueryBuilder.NAME; | ||
} | ||
|
||
private MatchQueryBuilder copyMatchQueryBuilder(MatchQueryBuilder queryBuilder) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Non-blocking feedback: This PR has been around for a while, and I'm OK with this as written if we just want to get it in. However, future maintainability would be a concern of mine (what if we add a new param to match, such as prefiltering?). I'd almost suggest creating a new constructor in |
||
MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder(queryBuilder.fieldName(), queryBuilder.value()); | ||
matchQueryBuilder.operator(queryBuilder.operator()); | ||
matchQueryBuilder.prefixLength(queryBuilder.prefixLength()); | ||
matchQueryBuilder.maxExpansions(queryBuilder.maxExpansions()); | ||
matchQueryBuilder.fuzzyTranspositions(queryBuilder.fuzzyTranspositions()); | ||
matchQueryBuilder.lenient(queryBuilder.lenient()); | ||
matchQueryBuilder.zeroTermsQuery(queryBuilder.zeroTermsQuery()); | ||
matchQueryBuilder.analyzer(queryBuilder.analyzer()); | ||
matchQueryBuilder.minimumShouldMatch(queryBuilder.minimumShouldMatch()); | ||
matchQueryBuilder.fuzzyRewrite(queryBuilder.fuzzyRewrite()); | ||
|
||
if (queryBuilder.fuzziness() != null) { | ||
matchQueryBuilder.fuzziness(queryBuilder.fuzziness()); | ||
} | ||
|
||
matchQueryBuilder.autoGenerateSynonymsPhraseQuery(queryBuilder.autoGenerateSynonymsPhraseQuery()); | ||
return matchQueryBuilder; | ||
} | ||
} |
Samiul-TheSoccerFan marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,8 @@ public class SemanticMatchQueryRewriteInterceptorTests extends ESTestCase { | |
|
||
private static final String FIELD_NAME = "fieldName"; | ||
private static final String VALUE = "value"; | ||
private static final String QUERY_NAME = "match_query"; | ||
private static final float BOOST = 5.0f; | ||
|
||
@Before | ||
public void setup() { | ||
|
@@ -79,10 +81,38 @@ public void testMatchQueryOnNonInferenceFieldRemainsMatchQuery() throws IOExcept | |
assertEquals(original, rewritten); | ||
} | ||
|
||
public void testBoostAndQueryNameInMatchQueryRewrite() throws IOException { | ||
Map<String, InferenceFieldMetadata> inferenceFields = Map.of( | ||
FIELD_NAME, | ||
new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) | ||
); | ||
QueryRewriteContext context = createQueryRewriteContext(inferenceFields); | ||
QueryBuilder original = createTestQueryBuilderWithBoostAndQueryName(); | ||
QueryBuilder rewritten = original.rewrite(context); | ||
assertTrue( | ||
"Expected query to be intercepted, but was [" + rewritten.getClass().getName() + "]", | ||
rewritten instanceof InterceptedQueryBuilderWrapper | ||
); | ||
InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; | ||
assertEquals(BOOST, intercepted.boost(), 0.0f); | ||
assertEquals(QUERY_NAME, intercepted.queryName()); | ||
assertTrue(intercepted.queryBuilder instanceof SemanticQueryBuilder); | ||
SemanticQueryBuilder semanticQueryBuilder = (SemanticQueryBuilder) intercepted.queryBuilder; | ||
assertEquals(FIELD_NAME, semanticQueryBuilder.getFieldName()); | ||
assertEquals(VALUE, semanticQueryBuilder.getQuery()); | ||
} | ||
|
||
private MatchQueryBuilder createTestQueryBuilder() { | ||
return new MatchQueryBuilder(FIELD_NAME, VALUE); | ||
} | ||
|
||
private MatchQueryBuilder createTestQueryBuilderWithBoostAndQueryName() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Non-blocking nitpick: Since this is pretty simple and only used once, it doesn't need to be its own method. Maybe more readable to include the test query builder inside the test itself. |
||
MatchQueryBuilder queryBuilder = createTestQueryBuilder(); | ||
queryBuilder.boost(BOOST); | ||
queryBuilder.queryName(QUERY_NAME); | ||
return queryBuilder; | ||
} | ||
|
||
private QueryRewriteContext createQueryRewriteContext(Map<String, InferenceFieldMetadata> inferenceFields) { | ||
IndexMetadata indexMetadata = IndexMetadata.builder(index.getName()) | ||
.settings( | ||
|
Samiul-TheSoccerFan marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,62 +52,68 @@ public void cleanup() { | |
} | ||
|
||
public void testSparseVectorQueryOnInferenceFieldIsInterceptedAndRewritten() throws IOException { | ||
float boost = randomFloatBetween(1, 10, true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we do a |
||
String queryName = randomAlphaOfLength(5); | ||
Map<String, InferenceFieldMetadata> inferenceFields = Map.of( | ||
FIELD_NAME, | ||
new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) | ||
); | ||
QueryRewriteContext context = createQueryRewriteContext(inferenceFields); | ||
QueryBuilder original = new SparseVectorQueryBuilder(FIELD_NAME, INFERENCE_ID, QUERY); | ||
QueryBuilder rewritten = original.rewrite(context); | ||
assertTrue( | ||
"Expected query to be intercepted, but was [" + rewritten.getClass().getName() + "]", | ||
rewritten instanceof InterceptedQueryBuilderWrapper | ||
); | ||
InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; | ||
assertTrue(intercepted.queryBuilder instanceof NestedQueryBuilder); | ||
NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; | ||
assertEquals(SemanticTextField.getChunksFieldName(FIELD_NAME), nestedQueryBuilder.path()); | ||
QueryBuilder innerQuery = nestedQueryBuilder.query(); | ||
assertTrue(innerQuery instanceof SparseVectorQueryBuilder); | ||
SparseVectorQueryBuilder sparseVectorQueryBuilder = (SparseVectorQueryBuilder) innerQuery; | ||
assertEquals(SemanticTextField.getEmbeddingsFieldName(FIELD_NAME), sparseVectorQueryBuilder.getFieldName()); | ||
assertEquals(INFERENCE_ID, sparseVectorQueryBuilder.getInferenceId()); | ||
assertEquals(QUERY, sparseVectorQueryBuilder.getQuery()); | ||
original.boost(boost); | ||
original.queryName(queryName); | ||
testRewrittenInferenceQuery(context, original); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice consolidation 🙌 |
||
} | ||
|
||
public void testSparseVectorQueryOnInferenceFieldWithoutInferenceIdIsInterceptedAndRewritten() throws IOException { | ||
float boost = randomFloatBetween(1, 10, true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same note on |
||
String queryName = randomAlphaOfLength(5); | ||
Map<String, InferenceFieldMetadata> inferenceFields = Map.of( | ||
FIELD_NAME, | ||
new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) | ||
); | ||
QueryRewriteContext context = createQueryRewriteContext(inferenceFields); | ||
QueryBuilder original = new SparseVectorQueryBuilder(FIELD_NAME, null, QUERY); | ||
original.boost(boost); | ||
original.queryName(queryName); | ||
testRewrittenInferenceQuery(context, original); | ||
} | ||
|
||
public void testSparseVectorQueryOnNonInferenceFieldRemainsUnchanged() throws IOException { | ||
QueryRewriteContext context = createQueryRewriteContext(Map.of()); // No inference fields | ||
QueryBuilder original = new SparseVectorQueryBuilder(FIELD_NAME, INFERENCE_ID, QUERY); | ||
QueryBuilder rewritten = original.rewrite(context); | ||
assertTrue( | ||
"Expected query to remain sparse_vector but was [" + rewritten.getClass().getName() + "]", | ||
rewritten instanceof SparseVectorQueryBuilder | ||
); | ||
assertEquals(original, rewritten); | ||
} | ||
|
||
private void testRewrittenInferenceQuery(QueryRewriteContext context, QueryBuilder original) throws IOException { | ||
QueryBuilder rewritten = original.rewrite(context); | ||
assertTrue( | ||
"Expected query to be intercepted, but was [" + rewritten.getClass().getName() + "]", | ||
rewritten instanceof InterceptedQueryBuilderWrapper | ||
); | ||
InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; | ||
assertEquals(original.boost(), intercepted.boost(), 0.0f); | ||
assertEquals(original.queryName(), intercepted.queryName()); | ||
|
||
assertTrue(intercepted.queryBuilder instanceof NestedQueryBuilder); | ||
NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; | ||
assertEquals(SemanticTextField.getChunksFieldName(FIELD_NAME), nestedQueryBuilder.path()); | ||
assertEquals(original.boost(), nestedQueryBuilder.boost(), 0.0f); | ||
assertEquals(original.queryName(), nestedQueryBuilder.queryName()); | ||
|
||
QueryBuilder innerQuery = nestedQueryBuilder.query(); | ||
assertTrue(innerQuery instanceof SparseVectorQueryBuilder); | ||
SparseVectorQueryBuilder sparseVectorQueryBuilder = (SparseVectorQueryBuilder) innerQuery; | ||
assertEquals(SemanticTextField.getEmbeddingsFieldName(FIELD_NAME), sparseVectorQueryBuilder.getFieldName()); | ||
assertEquals(INFERENCE_ID, sparseVectorQueryBuilder.getInferenceId()); | ||
assertEquals(QUERY, sparseVectorQueryBuilder.getQuery()); | ||
} | ||
|
||
public void testSparseVectorQueryOnNonInferenceFieldRemainsUnchanged() throws IOException { | ||
QueryRewriteContext context = createQueryRewriteContext(Map.of()); // No inference fields | ||
QueryBuilder original = new SparseVectorQueryBuilder(FIELD_NAME, INFERENCE_ID, QUERY); | ||
QueryBuilder rewritten = original.rewrite(context); | ||
assertTrue( | ||
"Expected query to remain sparse_vector but was [" + rewritten.getClass().getName() + "]", | ||
rewritten instanceof SparseVectorQueryBuilder | ||
); | ||
assertEquals(original, rewritten); | ||
assertEquals(1.0f, sparseVectorQueryBuilder.boost(), 0.0f); | ||
assertNull(sparseVectorQueryBuilder.queryName()); | ||
} | ||
|
||
private QueryRewriteContext createQueryRewriteContext(Map<String, InferenceFieldMetadata> inferenceFields) { | ||
|
Uh oh!
There was an error while loading. Please reload this page.