From ccd64ae6101b40c4c71db100c71bb573fc3f81c0 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 11 Jun 2025 12:36:20 -0400 Subject: [PATCH 01/49] fix boosting for knn --- .../queries/SemanticKnnVectorQueryRewriteInterceptor.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index 9e513a1ed9226..7fc7c296c9e25 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -148,6 +148,8 @@ private KnnVectorQueryBuilder addIndexFilterToKnnVectorQuery(Collection ); } + copy.boost(original.boost()); + copy.queryName(original.queryName()); copy.addFilterQueries(original.filterQueries()); copy.addFilterQuery(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return copy; @@ -187,6 +189,8 @@ private KnnVectorQueryBuilder buildNewKnnVectorQuery( ); } + newQueryBuilder.boost(original.boost()); + newQueryBuilder.queryName(original.queryName()); newQueryBuilder.addFilterQueries(original.filterQueries()); return newQueryBuilder; } From 9338cd503cd37deea6d0cccc6d9eed5cff0cb4a2 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 11 Jun 2025 13:21:18 -0400 Subject: [PATCH 02/49] Fixing for match query --- .../SemanticMatchQueryRewriteInterceptor.java | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index fd1d65d00faf5..734311afb9090 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -36,7 +36,14 @@ protected String getQuery(QueryBuilder queryBuilder) { @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { - return new SemanticQueryBuilder(indexInformation.fieldName(), getQuery(queryBuilder), false); + SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder( + indexInformation.fieldName(), + getQuery(queryBuilder), + false + ); + semanticQueryBuilder.boost(queryBuilder.boost()); + semanticQueryBuilder.queryName(queryBuilder.queryName()); + return semanticQueryBuilder; } @Override @@ -47,13 +54,14 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( assert (queryBuilder instanceof MatchQueryBuilder); MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) queryBuilder; BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - boolQueryBuilder.should( - createSemanticSubQuery( - indexInformation.getInferenceIndices(), - matchQueryBuilder.fieldName(), - (String) matchQueryBuilder.value() - ) + SemanticQueryBuilder semanticQueryBuilder = createSemanticSubQuery( + indexInformation.getInferenceIndices(), + matchQueryBuilder.fieldName(), + (String) matchQueryBuilder.value() ); + semanticQueryBuilder.boost(matchQueryBuilder.boost()); + semanticQueryBuilder.queryName(matchQueryBuilder.queryName()); + boolQueryBuilder.should(semanticQueryBuilder); boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder)); return boolQueryBuilder; } From 370931dc6554d345c0ee76ec4b6e46ff47c881bb Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 11 Jun 2025 14:27:13 -0400 Subject: [PATCH 03/49] fixing for match subquery --- .../queries/SemanticMatchQueryRewriteInterceptor.java | 8 ++++---- .../queries/SemanticQueryRewriteInterceptor.java | 8 ++++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index 734311afb9090..d9a81161d99e9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -54,13 +54,13 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( assert (queryBuilder instanceof MatchQueryBuilder); MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) queryBuilder; BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - SemanticQueryBuilder semanticQueryBuilder = createSemanticSubQuery( + QueryBuilder semanticQueryBuilder = createSemanticSubQuery( indexInformation.getInferenceIndices(), matchQueryBuilder.fieldName(), - (String) matchQueryBuilder.value() + (String) matchQueryBuilder.value(), + matchQueryBuilder.boost(), + matchQueryBuilder.queryName() ); - semanticQueryBuilder.boost(matchQueryBuilder.boost()); - semanticQueryBuilder.queryName(matchQueryBuilder.queryName()); boolQueryBuilder.should(semanticQueryBuilder); boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder)); return boolQueryBuilder; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index bb76ef0be24e9..45e53f7b58f0b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -114,9 +114,13 @@ protected QueryBuilder createSubQueryForIndices(Collection indices, Quer return boolQueryBuilder; } - protected QueryBuilder createSemanticSubQuery(Collection indices, String fieldName, String value) { + protected QueryBuilder createSemanticSubQuery(Collection indices, String fieldName, String value, Float boost, String queryName) { + SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(fieldName, value, true); + semanticQueryBuilder.boost(boost); + semanticQueryBuilder.queryName(queryName); + BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - boolQueryBuilder.must(new SemanticQueryBuilder(fieldName, value, true)); + boolQueryBuilder.must(semanticQueryBuilder); boolQueryBuilder.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return boolQueryBuilder; } From b85abda36731b147515a3f664f2cd50c828fe50c Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 11 Jun 2025 14:27:45 -0400 Subject: [PATCH 04/49] fix for sparse vector query boost --- ...icSparseVectorQueryRewriteInterceptor.java | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java index a35e83450c55a..7ddbdc9ae46cc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java @@ -101,18 +101,21 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( private QueryBuilder buildNestedQueryFromSparseVectorQuery(QueryBuilder queryBuilder, String searchInferenceId) { assert (queryBuilder instanceof SparseVectorQueryBuilder); SparseVectorQueryBuilder sparseVectorQueryBuilder = (SparseVectorQueryBuilder) queryBuilder; + SparseVectorQueryBuilder newSparseVectorQueryBuilder = new SparseVectorQueryBuilder( + SemanticTextField.getEmbeddingsFieldName(sparseVectorQueryBuilder.getFieldName()), + sparseVectorQueryBuilder.getQueryVectors(), + (sparseVectorQueryBuilder.getInferenceId() == null && sparseVectorQueryBuilder.getQuery() != null) + ? searchInferenceId + : sparseVectorQueryBuilder.getInferenceId(), + sparseVectorQueryBuilder.getQuery(), + sparseVectorQueryBuilder.shouldPruneTokens(), + sparseVectorQueryBuilder.getTokenPruningConfig() + ); + newSparseVectorQueryBuilder.boost(sparseVectorQueryBuilder.boost()); + newSparseVectorQueryBuilder.queryName(sparseVectorQueryBuilder.queryName()); return QueryBuilders.nestedQuery( SemanticTextField.getChunksFieldName(sparseVectorQueryBuilder.getFieldName()), - new SparseVectorQueryBuilder( - SemanticTextField.getEmbeddingsFieldName(sparseVectorQueryBuilder.getFieldName()), - sparseVectorQueryBuilder.getQueryVectors(), - (sparseVectorQueryBuilder.getInferenceId() == null && sparseVectorQueryBuilder.getQuery() != null) - ? searchInferenceId - : sparseVectorQueryBuilder.getInferenceId(), - sparseVectorQueryBuilder.getQuery(), - sparseVectorQueryBuilder.shouldPruneTokens(), - sparseVectorQueryBuilder.getTokenPruningConfig() - ), + newSparseVectorQueryBuilder, ScoreMode.Max ); } From 5db26868857a30ba057389ed8d166525f554b127 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 11 Jun 2025 14:28:38 -0400 Subject: [PATCH 05/49] fix linting issues --- .../queries/SemanticMatchQueryRewriteInterceptor.java | 6 +----- .../queries/SemanticQueryRewriteInterceptor.java | 8 +++++++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index d9a81161d99e9..f76eb321e2fc9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -36,11 +36,7 @@ protected String getQuery(QueryBuilder queryBuilder) { @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { - SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder( - indexInformation.fieldName(), - getQuery(queryBuilder), - false - ); + SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(indexInformation.fieldName(), getQuery(queryBuilder), false); semanticQueryBuilder.boost(queryBuilder.boost()); semanticQueryBuilder.queryName(queryBuilder.queryName()); return semanticQueryBuilder; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 45e53f7b58f0b..8a22f5d48f5f3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -114,7 +114,13 @@ protected QueryBuilder createSubQueryForIndices(Collection indices, Quer return boolQueryBuilder; } - protected QueryBuilder createSemanticSubQuery(Collection indices, String fieldName, String value, Float boost, String queryName) { + protected QueryBuilder createSemanticSubQuery( + Collection indices, + String fieldName, + String value, + Float boost, + String queryName + ) { SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(fieldName, value, true); semanticQueryBuilder.boost(boost); semanticQueryBuilder.queryName(queryName); From 2ce691e425968790ff87847f8718c6cce777e700 Mon Sep 17 00:00:00 2001 From: Samiul Monir <150824886+Samiul-TheSoccerFan@users.noreply.github.com> Date: Wed, 11 Jun 2025 14:39:05 -0400 Subject: [PATCH 06/49] Update docs/changelog/129282.yaml --- docs/changelog/129282.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/129282.yaml diff --git a/docs/changelog/129282.yaml b/docs/changelog/129282.yaml new file mode 100644 index 0000000000000..775e48aa823cf --- /dev/null +++ b/docs/changelog/129282.yaml @@ -0,0 +1,5 @@ +pr: 129282 +summary: Fix Semantic Query Rewrite Interception Drops Boosts +area: Relevance +type: bug +issues: [] From 4100200a960f7fe615320ef4f8764502b880dc2f Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 11 Jun 2025 14:44:38 -0400 Subject: [PATCH 07/49] update changelog --- docs/changelog/129282.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/changelog/129282.yaml b/docs/changelog/129282.yaml index 775e48aa823cf..f5d9b747861a9 100644 --- a/docs/changelog/129282.yaml +++ b/docs/changelog/129282.yaml @@ -1,5 +1,6 @@ pr: 129282 -summary: Fix Semantic Query Rewrite Interception Drops Boosts -area: Relevance +summary: Fix query rewrite logic to preserve `boosts` and `queryName` for `match`, `knn`, and `sparse_vector` queries on semantic_text fields +area: Search type: bug -issues: [] +issues: + - 128696 From 3406ae1bd286ee07d1a3e1c314a48f3fd3143193 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 12 Jun 2025 15:59:59 -0400 Subject: [PATCH 08/49] Copy constructor with match query --- .../SemanticMatchQueryRewriteInterceptor.java | 10 ++-------- .../inference/queries/SemanticQueryBuilder.java | 7 +++++++ .../queries/SemanticQueryRewriteInterceptor.java | 16 +++++++--------- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index f76eb321e2fc9..8d94427db0b41 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -36,10 +36,7 @@ protected String getQuery(QueryBuilder queryBuilder) { @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { - SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(indexInformation.fieldName(), getQuery(queryBuilder), false); - semanticQueryBuilder.boost(queryBuilder.boost()); - semanticQueryBuilder.queryName(queryBuilder.queryName()); - return semanticQueryBuilder; + return SemanticQueryBuilder.from(queryBuilder, indexInformation.fieldName(), getQuery(queryBuilder), false); } @Override @@ -52,10 +49,7 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); QueryBuilder semanticQueryBuilder = createSemanticSubQuery( indexInformation.getInferenceIndices(), - matchQueryBuilder.fieldName(), - (String) matchQueryBuilder.value(), - matchQueryBuilder.boost(), - matchQueryBuilder.queryName() + matchQueryBuilder ); boolQueryBuilder.should(semanticQueryBuilder); boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder)); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 182c083ef1c26..f49fc1636e710 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -109,6 +109,13 @@ public SemanticQueryBuilder(StreamInput in) throws IOException { } } + public static SemanticQueryBuilder from(QueryBuilder queryBuilder, String fieldName, String query, Boolean lenient) { + SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(fieldName, query, lenient); + semanticQueryBuilder.boost(queryBuilder.boost()); + semanticQueryBuilder.queryName(queryBuilder.queryName()); + return semanticQueryBuilder; + } + @Override protected void doWriteTo(StreamOutput out) throws IOException { if (inferenceResultsSupplier != null) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 8a22f5d48f5f3..b68eea5cdfb10 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -116,17 +116,15 @@ protected QueryBuilder createSubQueryForIndices(Collection indices, Quer protected QueryBuilder createSemanticSubQuery( Collection indices, - String fieldName, - String value, - Float boost, - String queryName + QueryBuilder queryBuilder ) { - SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(fieldName, value, true); - semanticQueryBuilder.boost(boost); - semanticQueryBuilder.queryName(queryName); - BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - boolQueryBuilder.must(semanticQueryBuilder); + boolQueryBuilder.must(SemanticQueryBuilder.from( + queryBuilder, + getFieldName(queryBuilder), + getQuery(queryBuilder), + true + )); boolQueryBuilder.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return boolQueryBuilder; } From d07952a03fe8510dcef35b8f0e9ef6f52069dbda Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 12 Jun 2025 16:10:24 -0400 Subject: [PATCH 09/49] util function to create sparseVectorBuilder for sparse query --- .../ml/search/SparseVectorQueryBuilder.java | 15 ++++++++++++ ...icSparseVectorQueryRewriteInterceptor.java | 24 +++++++++---------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java index f5815a3bfde23..3d6df347aecbf 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java @@ -145,6 +145,21 @@ private SparseVectorQueryBuilder(SparseVectorQueryBuilder other, SetOnce queryVectors, + @Nullable String inferenceId, + @Nullable String query, + @Nullable Boolean shouldPruneTokens, + @Nullable TokenPruningConfig tokenPruningConfig + ) { + SparseVectorQueryBuilder sparseVectorQueryBuilder = new SparseVectorQueryBuilder(fieldName, queryVectors, inferenceId, query, shouldPruneTokens, tokenPruningConfig); + sparseVectorQueryBuilder.boost(queryBuilder.boost()); + sparseVectorQueryBuilder.queryName(queryBuilder.queryName()); + return sparseVectorQueryBuilder; + } + public String getFieldName() { return fieldName; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java index 7ddbdc9ae46cc..f3006070479a7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java @@ -101,21 +101,19 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( private QueryBuilder buildNestedQueryFromSparseVectorQuery(QueryBuilder queryBuilder, String searchInferenceId) { assert (queryBuilder instanceof SparseVectorQueryBuilder); SparseVectorQueryBuilder sparseVectorQueryBuilder = (SparseVectorQueryBuilder) queryBuilder; - SparseVectorQueryBuilder newSparseVectorQueryBuilder = new SparseVectorQueryBuilder( - SemanticTextField.getEmbeddingsFieldName(sparseVectorQueryBuilder.getFieldName()), - sparseVectorQueryBuilder.getQueryVectors(), - (sparseVectorQueryBuilder.getInferenceId() == null && sparseVectorQueryBuilder.getQuery() != null) - ? searchInferenceId - : sparseVectorQueryBuilder.getInferenceId(), - sparseVectorQueryBuilder.getQuery(), - sparseVectorQueryBuilder.shouldPruneTokens(), - sparseVectorQueryBuilder.getTokenPruningConfig() - ); - newSparseVectorQueryBuilder.boost(sparseVectorQueryBuilder.boost()); - newSparseVectorQueryBuilder.queryName(sparseVectorQueryBuilder.queryName()); return QueryBuilders.nestedQuery( SemanticTextField.getChunksFieldName(sparseVectorQueryBuilder.getFieldName()), - newSparseVectorQueryBuilder, + SparseVectorQueryBuilder.from( + queryBuilder, + SemanticTextField.getEmbeddingsFieldName(sparseVectorQueryBuilder.getFieldName()), + sparseVectorQueryBuilder.getQueryVectors(), + (sparseVectorQueryBuilder.getInferenceId() == null && sparseVectorQueryBuilder.getQuery() != null) + ? searchInferenceId + : sparseVectorQueryBuilder.getInferenceId(), + sparseVectorQueryBuilder.getQuery(), + sparseVectorQueryBuilder.shouldPruneTokens(), + sparseVectorQueryBuilder.getTokenPruningConfig() + ), ScoreMode.Max ); } From f13363238a0f35f6cad4e8e85c9c63bbd931f496 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 12 Jun 2025 17:39:21 -0400 Subject: [PATCH 10/49] util function for knn query to support boost --- .../search/vectors/KnnVectorQueryBuilder.java | 36 +++++++++++++++ ...anticKnnVectorQueryRewriteInterceptor.java | 46 +------------------ 2 files changed, 38 insertions(+), 44 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java index 87f9a50c64c17..fc7a24eba25b0 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java @@ -265,6 +265,42 @@ public KnnVectorQueryBuilder(StreamInput in) throws IOException { this.queryVectorSupplier = null; } + public static KnnVectorQueryBuilder from( + KnnVectorQueryBuilder queryBuilder + ) { + return from(queryBuilder, queryBuilder.getFieldName(), queryBuilder.queryVectorBuilder()); + } + + public static KnnVectorQueryBuilder from( + KnnVectorQueryBuilder queryBuilder, + String fieldName, + QueryVectorBuilder queryVectorBuilder + ) { + KnnVectorQueryBuilder knnVectorQueryBuilder; + if (queryBuilder.queryVectorBuilder() != null) { + knnVectorQueryBuilder = new KnnVectorQueryBuilder( + fieldName, + queryVectorBuilder, + queryBuilder.k(), + queryBuilder.numCands(), + queryBuilder.getVectorSimilarity() + ); + } else { + knnVectorQueryBuilder = new KnnVectorQueryBuilder( + fieldName, + queryBuilder.queryVector(), + queryBuilder.k(), + queryBuilder.numCands(), + queryBuilder.rescoreVectorBuilder(), + queryBuilder.getVectorSimilarity() + ); + } + + knnVectorQueryBuilder.boost(queryBuilder.boost()); + knnVectorQueryBuilder.queryName(queryBuilder.queryName()); + return knnVectorQueryBuilder; + } + public String getFieldName() { return fieldName; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index 7fc7c296c9e25..ffda854b42f35 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -128,28 +128,7 @@ private QueryBuilder buildNestedQueryFromKnnVectorQuery( } private KnnVectorQueryBuilder addIndexFilterToKnnVectorQuery(Collection indices, KnnVectorQueryBuilder original) { - KnnVectorQueryBuilder copy; - if (original.queryVectorBuilder() != null) { - copy = new KnnVectorQueryBuilder( - original.getFieldName(), - original.queryVectorBuilder(), - original.k(), - original.numCands(), - original.getVectorSimilarity() - ); - } else { - copy = new KnnVectorQueryBuilder( - original.getFieldName(), - original.queryVector(), - original.k(), - original.numCands(), - original.rescoreVectorBuilder(), - original.getVectorSimilarity() - ); - } - - copy.boost(original.boost()); - copy.queryName(original.queryName()); + KnnVectorQueryBuilder copy = KnnVectorQueryBuilder.from(original); copy.addFilterQueries(original.filterQueries()); copy.addFilterQuery(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return copy; @@ -169,28 +148,7 @@ private KnnVectorQueryBuilder buildNewKnnVectorQuery( KnnVectorQueryBuilder original, QueryVectorBuilder queryVectorBuilder ) { - KnnVectorQueryBuilder newQueryBuilder; - if (original.queryVectorBuilder() != null) { - newQueryBuilder = new KnnVectorQueryBuilder( - fieldName, - queryVectorBuilder, - original.k(), - original.numCands(), - original.getVectorSimilarity() - ); - } else { - newQueryBuilder = new KnnVectorQueryBuilder( - fieldName, - original.queryVector(), - original.k(), - original.numCands(), - original.rescoreVectorBuilder(), - original.getVectorSimilarity() - ); - } - - newQueryBuilder.boost(original.boost()); - newQueryBuilder.queryName(original.queryName()); + KnnVectorQueryBuilder newQueryBuilder = KnnVectorQueryBuilder.from(original, fieldName, queryVectorBuilder); newQueryBuilder.addFilterQueries(original.filterQueries()); return newQueryBuilder; } From a9048f0233f68afb5d36bcb03b96fbd8fff12e91 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 12 Jun 2025 18:46:23 -0400 Subject: [PATCH 11/49] adding unit tests for all intercepted query terms --- ...KnnVectorQueryRewriteInterceptorTests.java | 23 ++++++++++++++ ...nticMatchQueryRewriteInterceptorTests.java | 30 ++++++++++++++++++ ...rseVectorQueryRewriteInterceptorTests.java | 31 +++++++++++++++++++ 3 files changed, 84 insertions(+) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java index 270cdba6d3469..f970df44ff06d 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java @@ -64,6 +64,29 @@ public void testKnnQueryWithVectorBuilderIsInterceptedAndRewritten() throws IOEx testRewrittenInferenceQuery(context, original); } + public void testKnnQueryWithVectorBuilderIsInterceptedAndRewrittenWithBoostAndQueryName() throws IOException { + float BOOST = 2.0f; + String QUERY_NAME = "knn_query"; + + Map inferenceFields = Map.of( + FIELD_NAME, + new InferenceFieldMetadata(index.getName(), INFERENCE_ID, new String[] { FIELD_NAME }, null) + ); + QueryRewriteContext context = createQueryRewriteContext(inferenceFields); + QueryVectorBuilder queryVectorBuilder = new TextEmbeddingQueryVectorBuilder(INFERENCE_ID, QUERY); + KnnVectorQueryBuilder original = new KnnVectorQueryBuilder(FIELD_NAME, queryVectorBuilder, 10, 100, null); + original.boost(BOOST); + original.queryName(QUERY_NAME); + + testRewrittenInferenceQuery(context, original); + QueryBuilder rewritten = original.rewrite(context); + InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; + NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; + KnnVectorQueryBuilder knnVectorQueryBuilder = (KnnVectorQueryBuilder) nestedQueryBuilder.query(); + assertEquals(BOOST, knnVectorQueryBuilder.boost(), 0.0f); + assertEquals(QUERY_NAME, knnVectorQueryBuilder.queryName()); + } + public void testKnnWithQueryBuilderWithoutInferenceIdIsInterceptedAndRewritten() throws IOException { Map inferenceFields = Map.of( FIELD_NAME, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java index 6987ef33ed63d..c7ea66fe3de83 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java @@ -36,6 +36,8 @@ public class SemanticMatchQueryRewriteInterceptorTests extends ESTestCase { private static final String FIELD_NAME = "fieldName"; private static final String VALUE = "value"; + private static final String QUERY_NAME = "match_query"; + private static final float BOOST = 2.0f; @Before public void setup() { @@ -79,10 +81,38 @@ public void testMatchQueryOnNonInferenceFieldRemainsMatchQuery() throws IOExcept assertEquals(original, rewritten); } + public void testBoostInMatchQueryRewrite() throws IOException { + Map inferenceFields = Map.of( + FIELD_NAME, + new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) + ); + QueryRewriteContext context = createQueryRewriteContext(inferenceFields); + QueryBuilder original = createTestQueryBuilderWithBoost(); + QueryBuilder rewritten = original.rewrite(context); + assertTrue( + "Expected query to be intercepted, but was [" + rewritten.getClass().getName() + "]", + rewritten instanceof InterceptedQueryBuilderWrapper + ); + InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; + assertTrue(intercepted.queryBuilder instanceof SemanticQueryBuilder); + SemanticQueryBuilder semanticQueryBuilder = (SemanticQueryBuilder) intercepted.queryBuilder; + assertEquals(FIELD_NAME, semanticQueryBuilder.getFieldName()); + assertEquals(VALUE, semanticQueryBuilder.getQuery()); + assertEquals(BOOST, semanticQueryBuilder.boost(), 0.0f); + assertEquals(QUERY_NAME, semanticQueryBuilder.queryName()); + } + private MatchQueryBuilder createTestQueryBuilder() { return new MatchQueryBuilder(FIELD_NAME, VALUE); } + private MatchQueryBuilder createTestQueryBuilderWithBoost() { + MatchQueryBuilder queryBuilder = createTestQueryBuilder(); + queryBuilder.boost(BOOST); + queryBuilder.queryName(QUERY_NAME); + return queryBuilder; + } + private QueryRewriteContext createQueryRewriteContext(Map inferenceFields) { IndexMetadata indexMetadata = IndexMetadata.builder(index.getName()) .settings( diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java index 075955766a0a9..07c0e7ab2979a 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java @@ -110,6 +110,37 @@ public void testSparseVectorQueryOnNonInferenceFieldRemainsUnchanged() throws IO assertEquals(original, rewritten); } + public void testBoostAndQueryNameOnSparseVectorQueryRewrite() throws IOException { + float BOOST = 2.0f; + String QUERY_NAME = "sparse_vector_query"; + + Map inferenceFields = Map.of( + FIELD_NAME, + new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) + ); + QueryRewriteContext context = createQueryRewriteContext(inferenceFields); + QueryBuilder original = new SparseVectorQueryBuilder(FIELD_NAME, INFERENCE_ID, QUERY); + original.boost(BOOST); + original.queryName(QUERY_NAME); + QueryBuilder rewritten = original.rewrite(context); + assertTrue( + "Expected query to be intercepted, but was [" + rewritten.getClass().getName() + "]", + rewritten instanceof InterceptedQueryBuilderWrapper + ); + InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; + assertTrue(intercepted.queryBuilder instanceof NestedQueryBuilder); + NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; + assertEquals(SemanticTextField.getChunksFieldName(FIELD_NAME), nestedQueryBuilder.path()); + QueryBuilder innerQuery = nestedQueryBuilder.query(); + assertTrue(innerQuery instanceof SparseVectorQueryBuilder); + SparseVectorQueryBuilder sparseVectorQueryBuilder = (SparseVectorQueryBuilder) innerQuery; + assertEquals(SemanticTextField.getEmbeddingsFieldName(FIELD_NAME), sparseVectorQueryBuilder.getFieldName()); + assertEquals(INFERENCE_ID, sparseVectorQueryBuilder.getInferenceId()); + assertEquals(QUERY, sparseVectorQueryBuilder.getQuery()); + assertEquals(BOOST, sparseVectorQueryBuilder.boost(), 0.0f); + assertEquals(QUERY_NAME, sparseVectorQueryBuilder.queryName()); + } + private QueryRewriteContext createQueryRewriteContext(Map inferenceFields) { IndexMetadata indexMetadata = IndexMetadata.builder(index.getName()) .settings( From 5a1dab92db714663dcbf8d0f9f9099a2e6e08aaf Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 13 Jun 2025 02:49:01 -0400 Subject: [PATCH 12/49] Adding yaml test for match,sparse, and knn --- .../xpack/inference/InferenceFeatures.java | 4 +- .../test/inference/45_semantic_text_match.yml | 38 +++++++++++++++++++ .../46_semantic_text_sparse_vector.yml | 29 ++++++++++++++ .../test/inference/47_semantic_text_knn.yml | 32 ++++++++++++++++ 4 files changed, 102 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 669e29ba7debf..a3e3bd8993047 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -35,6 +35,7 @@ public class InferenceFeatures implements FeatureSpecification { "test_rule_retriever.with_indices_that_dont_return_rank_docs" ); private static final NodeFeature SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER = new NodeFeature("semantic_text.match_all_highlighter"); + private static final NodeFeature SEMANTIC_TEXT_QUERY_REWRITE_BOOST_AND_QUERY_NAME_FIX = new NodeFeature("semantic_text.query_rewrite.boost_and_query_name_fix"); @Override public Set getTestFeatures() { @@ -59,7 +60,8 @@ public Set getTestFeatures() { SemanticTextFieldMapper.SEMANTIC_TEXT_HANDLE_EMPTY_INPUT, TEST_RULE_RETRIEVER_WITH_INDICES_THAT_DONT_RETURN_RANK_DOCS, SEMANTIC_TEXT_SUPPORT_CHUNKING_CONFIG, - SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER + SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER, + SEMANTIC_TEXT_QUERY_REWRITE_BOOST_AND_QUERY_NAME_FIX ); } } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml index 28093ba49e6cc..bf88a8a7de3d3 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml @@ -277,3 +277,41 @@ setup: query: "inference test" - match: { hits.total.value: 0 } + +--- +"Apply boost and query name": + - requires: + cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + reason: fix boosting and query name for semantic text match queries. + + - skip: + features: [ "headers", "close_to" ] + + - do: + index: + index: test-sparse-index + id: doc_1 + body: + inference_field: [ "inference test", "another inference test" ] + non_inference_field: "non inference test" + refresh: true + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-sparse-index + body: + query: + match: + inference_field: + query: "inference test" + boost: 5.0 + _name: i-like-naming-my-queries + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 136.1608, error: 1e13 } } + - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } + diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml index f1cff512fd209..bdf66ec1555b8 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml @@ -247,3 +247,32 @@ setup: - match: { hits.total.value: 2 } +--- +"Apply boost and query name": + - requires: + cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + reason: fix boosting and query name for semantic text sparse vector queries. + + - skip: + features: [ "headers", "close_to" ] + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-semantic-text-index + body: + query: + sparse_vector: + field: inference_field + query: "inference test" + boost: 5.0 + _name: i-like-naming-my-queries + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 136.1608, error: 1e13 } } + - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } + + diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml index 64ecb0f2d882c..a9dd7ffcaf7b9 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml @@ -404,4 +404,36 @@ setup: - match: { hits.total.value: 4 } +--- +"Apply boost and query name": + - requires: + cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + reason: fix boosting and query name for semantic text knn queries. + + - skip: + features: [ "headers", "close_to" ] + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-semantic-text-index + body: + query: + knn: + field: inference_field + k: 2 + num_candidates: 100 + query_vector_builder: + text_embedding: + model_text: test + boost: 5.0 + _name: i-like-naming-my-queries + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 4.7153287, error: 1e13 } } + - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } + From 6cef4419bbdbfc44ee4307e2188d4da9646087ed Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 13 Jun 2025 02:49:31 -0400 Subject: [PATCH 13/49] Adding queryname support for nested query --- .../org/elasticsearch/index/query/NestedQueryBuilder.java | 4 ++++ .../java/org/elasticsearch/index/query/QueryBuilders.java | 4 ++++ .../queries/SemanticKnnVectorQueryRewriteInterceptor.java | 3 ++- .../queries/SemanticSparseVectorQueryRewriteInterceptor.java | 3 ++- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java index 34c5ede62a656..657273ed8583f 100644 --- a/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java @@ -84,6 +84,10 @@ private NestedQueryBuilder(String path, QueryBuilder query, ScoreMode scoreMode, this.innerHitBuilder = innerHitBuilder; } + public static NestedQueryBuilder from(String path, QueryBuilder query, ScoreMode scoreMode, String queryName) { + return new NestedQueryBuilder(path, query, scoreMode).queryName(queryName); + } + /** * Read from a stream. */ diff --git a/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java b/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java index 66f1c9a74d4c2..a0db2f8e23426 100644 --- a/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java +++ b/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java @@ -484,6 +484,10 @@ public static NestedQueryBuilder nestedQuery(String path, QueryBuilder query, Sc return new NestedQueryBuilder(path, query, scoreMode); } + public static NestedQueryBuilder nestedQuery(String path, QueryBuilder query, ScoreMode scoreMode, String queryName) { + return NestedQueryBuilder.from(path, query, scoreMode, queryName); + } + /** * A filter for a field based on several terms matching on any of them. * diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index ffda854b42f35..8703f8f8436f4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -123,7 +123,8 @@ private QueryBuilder buildNestedQueryFromKnnVectorQuery( filteredKnnVectorQueryBuilder, queryVectorBuilder ), - ScoreMode.Max + ScoreMode.Max, + filteredKnnVectorQueryBuilder.queryName() ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java index f3006070479a7..6fc92fbd2d683 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java @@ -114,7 +114,8 @@ private QueryBuilder buildNestedQueryFromSparseVectorQuery(QueryBuilder queryBui sparseVectorQueryBuilder.shouldPruneTokens(), sparseVectorQueryBuilder.getTokenPruningConfig() ), - ScoreMode.Max + ScoreMode.Max, + queryBuilder.queryName() ); } From faa35ea81da21f8e9c437cc091c8872c3713cb1e Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 13 Jun 2025 02:51:35 -0400 Subject: [PATCH 14/49] fix code styles --- .../search/vectors/KnnVectorQueryBuilder.java | 10 ++-------- .../core/ml/search/SparseVectorQueryBuilder.java | 9 ++++++++- .../xpack/inference/InferenceFeatures.java | 4 +++- .../SemanticMatchQueryRewriteInterceptor.java | 5 +---- .../queries/SemanticQueryRewriteInterceptor.java | 12 ++---------- 5 files changed, 16 insertions(+), 24 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java index fc7a24eba25b0..ae9e4f4ca41ef 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java @@ -265,17 +265,11 @@ public KnnVectorQueryBuilder(StreamInput in) throws IOException { this.queryVectorSupplier = null; } - public static KnnVectorQueryBuilder from( - KnnVectorQueryBuilder queryBuilder - ) { + public static KnnVectorQueryBuilder from(KnnVectorQueryBuilder queryBuilder) { return from(queryBuilder, queryBuilder.getFieldName(), queryBuilder.queryVectorBuilder()); } - public static KnnVectorQueryBuilder from( - KnnVectorQueryBuilder queryBuilder, - String fieldName, - QueryVectorBuilder queryVectorBuilder - ) { + public static KnnVectorQueryBuilder from(KnnVectorQueryBuilder queryBuilder, String fieldName, QueryVectorBuilder queryVectorBuilder) { KnnVectorQueryBuilder knnVectorQueryBuilder; if (queryBuilder.queryVectorBuilder() != null) { knnVectorQueryBuilder = new KnnVectorQueryBuilder( diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java index 3d6df347aecbf..3ae796b1133c4 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java @@ -154,7 +154,14 @@ public static SparseVectorQueryBuilder from( @Nullable Boolean shouldPruneTokens, @Nullable TokenPruningConfig tokenPruningConfig ) { - SparseVectorQueryBuilder sparseVectorQueryBuilder = new SparseVectorQueryBuilder(fieldName, queryVectors, inferenceId, query, shouldPruneTokens, tokenPruningConfig); + SparseVectorQueryBuilder sparseVectorQueryBuilder = new SparseVectorQueryBuilder( + fieldName, + queryVectors, + inferenceId, + query, + shouldPruneTokens, + tokenPruningConfig + ); sparseVectorQueryBuilder.boost(queryBuilder.boost()); sparseVectorQueryBuilder.queryName(queryBuilder.queryName()); return sparseVectorQueryBuilder; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index a3e3bd8993047..efca57b4f2a42 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -35,7 +35,9 @@ public class InferenceFeatures implements FeatureSpecification { "test_rule_retriever.with_indices_that_dont_return_rank_docs" ); private static final NodeFeature SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER = new NodeFeature("semantic_text.match_all_highlighter"); - private static final NodeFeature SEMANTIC_TEXT_QUERY_REWRITE_BOOST_AND_QUERY_NAME_FIX = new NodeFeature("semantic_text.query_rewrite.boost_and_query_name_fix"); + private static final NodeFeature SEMANTIC_TEXT_QUERY_REWRITE_BOOST_AND_QUERY_NAME_FIX = new NodeFeature( + "semantic_text.query_rewrite.boost_and_query_name_fix" + ); @Override public Set getTestFeatures() { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index 8d94427db0b41..5721ccd73d5cb 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -47,10 +47,7 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( assert (queryBuilder instanceof MatchQueryBuilder); MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) queryBuilder; BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - QueryBuilder semanticQueryBuilder = createSemanticSubQuery( - indexInformation.getInferenceIndices(), - matchQueryBuilder - ); + QueryBuilder semanticQueryBuilder = createSemanticSubQuery(indexInformation.getInferenceIndices(), matchQueryBuilder); boolQueryBuilder.should(semanticQueryBuilder); boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder)); return boolQueryBuilder; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index b68eea5cdfb10..d2936f3542d11 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -114,17 +114,9 @@ protected QueryBuilder createSubQueryForIndices(Collection indices, Quer return boolQueryBuilder; } - protected QueryBuilder createSemanticSubQuery( - Collection indices, - QueryBuilder queryBuilder - ) { + protected QueryBuilder createSemanticSubQuery(Collection indices, QueryBuilder queryBuilder) { BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - boolQueryBuilder.must(SemanticQueryBuilder.from( - queryBuilder, - getFieldName(queryBuilder), - getQuery(queryBuilder), - true - )); + boolQueryBuilder.must(SemanticQueryBuilder.from(queryBuilder, getFieldName(queryBuilder), getQuery(queryBuilder), true)); boolQueryBuilder.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return boolQueryBuilder; } From 13e791eacd0cd5304455c732270e8caec5d6b93e Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 13 Jun 2025 03:53:14 -0400 Subject: [PATCH 15/49] Fix failed yaml tests --- .../rest-api-spec/test/inference/45_semantic_text_match.yml | 2 +- .../test/inference/46_semantic_text_sparse_vector.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml index bf88a8a7de3d3..0ff2a43f8f691 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml @@ -312,6 +312,6 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 136.1608, error: 1e13 } } + - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml index bdf66ec1555b8..160ac7738db48 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml @@ -272,7 +272,7 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 136.1608, error: 1e13 } } + - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } From 3a5a30fcbf3b9b2a3251efc06da58c1a0f8e3beb Mon Sep 17 00:00:00 2001 From: Samiul Monir <150824886+Samiul-TheSoccerFan@users.noreply.github.com> Date: Fri, 13 Jun 2025 03:54:07 -0400 Subject: [PATCH 16/49] Update docs/changelog/129282.yaml --- docs/changelog/129282.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/changelog/129282.yaml b/docs/changelog/129282.yaml index f5d9b747861a9..75e56899ee23e 100644 --- a/docs/changelog/129282.yaml +++ b/docs/changelog/129282.yaml @@ -1,6 +1,6 @@ pr: 129282 -summary: Fix query rewrite logic to preserve `boosts` and `queryName` for `match`, `knn`, and `sparse_vector` queries on semantic_text fields +summary: "Fix query rewrite logic to preserve `boosts` and `queryName` for `match`,\ + \ `knn`, and `sparse_vector` queries on semantic_text fields" area: Search type: bug -issues: - - 128696 +issues: [] From 016e4487e0e16e5a09f256d04be8abd3a6f43344 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Mon, 16 Jun 2025 10:55:40 -0400 Subject: [PATCH 17/49] update yaml tests to expand test scenarios --- .../test/inference/45_semantic_text_match.yml | 42 ++++++++++++++--- .../46_semantic_text_sparse_vector.yml | 44 ++++++++++++++++-- .../test/inference/47_semantic_text_knn.yml | 46 ++++++++++++++++++- 3 files changed, 119 insertions(+), 13 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml index 0ff2a43f8f691..eb611983c1ce3 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml @@ -287,13 +287,25 @@ setup: - skip: features: [ "headers", "close_to" ] + - do: + indices.create: + index: test-sparse-index-random + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + inference_field: + type: semantic_text + inference_id: sparse-inference-id + - do: index: - index: test-sparse-index + index: test-sparse-index-random id: doc_1 body: - inference_field: [ "inference test", "another inference test" ] - non_inference_field: "non inference test" + inference_field: [ "It was a beautiful game", "Very competitive" ] refresh: true - do: @@ -301,17 +313,33 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: test-sparse-index + index: test-sparse-index-random body: query: match: inference_field: - query: "inference test" + query: "soccer" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e15 } } + - not_exists: hits.hits.0.matched_queries + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-sparse-index-random + body: + query: + match: + inference_field: + query: "soccer" boost: 5.0 _name: i-like-naming-my-queries - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } - diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml index 160ac7738db48..fee37abd2938c 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml @@ -256,6 +256,27 @@ setup: - skip: features: [ "headers", "close_to" ] + - do: + indices.create: + index: test-sparse-index-random + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + inference_field: + type: semantic_text + inference_id: sparse-inference-id + + - do: + index: + index: test-sparse-index-random + id: doc_1 + body: + inference_field: [ "It was a beautiful game", "Very competitive" ] + refresh: true + - do: headers: # Force JSON content type so that we use a parser that interprets the floating-point score as a double @@ -266,13 +287,28 @@ setup: query: sparse_vector: field: inference_field - query: "inference test" + query: "soccer" + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 6.9606755E18, error: 1e15 } } + - not_exists: hits.hits.0.matched_queries + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-sparse-index-random + body: + query: + sparse_vector: + field: inference_field + query: "soccer" boost: 5.0 _name: i-like-naming-my-queries - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } - - diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml index a9dd7ffcaf7b9..377700f2d22bc 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml @@ -413,6 +413,27 @@ setup: - skip: features: [ "headers", "close_to" ] + - do: + indices.create: + index: test-sparse-index-random + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + inference_field: + type: semantic_text + inference_id: dense-inference-id + + - do: + index: + index: test-sparse-index-random + id: doc_1 + body: + inference_field: [ "It was a beautiful game", "Very competitive" ] + refresh: true + - do: headers: # Force JSON content type so that we use a parser that interprets the floating-point score as a double @@ -427,13 +448,34 @@ setup: num_candidates: 100 query_vector_builder: text_embedding: - model_text: test + model_text: soccer + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 0.9984111, error: 1e15 } } + - not_exists: hits.hits.0.matched_queries + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: test-sparse-index-random + body: + query: + knn: + field: inference_field + k: 2 + num_candidates: 100 + query_vector_builder: + text_embedding: + model_text: soccer boost: 5.0 _name: i-like-naming-my-queries - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 4.7153287, error: 1e13 } } + - close_to: { hits.hits.0._score: { value: 4.9907494, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } From efcf9c42e5d0733783b356ff6938bccc61a89864 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 25 Jun 2025 11:03:57 -0400 Subject: [PATCH 18/49] Updating knn to copy constructor --- .../search/vectors/KnnVectorQueryBuilder.java | 40 +++++++------------ ...anticKnnVectorQueryRewriteInterceptor.java | 7 +--- 2 files changed, 16 insertions(+), 31 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java index ae9e4f4ca41ef..fd59506af3bff 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java @@ -265,34 +265,22 @@ public KnnVectorQueryBuilder(StreamInput in) throws IOException { this.queryVectorSupplier = null; } - public static KnnVectorQueryBuilder from(KnnVectorQueryBuilder queryBuilder) { - return from(queryBuilder, queryBuilder.getFieldName(), queryBuilder.queryVectorBuilder()); + public KnnVectorQueryBuilder(KnnVectorQueryBuilder queryBuilder) { + this(queryBuilder, queryBuilder.getFieldName(), queryBuilder.queryVectorBuilder()); } - public static KnnVectorQueryBuilder from(KnnVectorQueryBuilder queryBuilder, String fieldName, QueryVectorBuilder queryVectorBuilder) { - KnnVectorQueryBuilder knnVectorQueryBuilder; - if (queryBuilder.queryVectorBuilder() != null) { - knnVectorQueryBuilder = new KnnVectorQueryBuilder( - fieldName, - queryVectorBuilder, - queryBuilder.k(), - queryBuilder.numCands(), - queryBuilder.getVectorSimilarity() - ); - } else { - knnVectorQueryBuilder = new KnnVectorQueryBuilder( - fieldName, - queryBuilder.queryVector(), - queryBuilder.k(), - queryBuilder.numCands(), - queryBuilder.rescoreVectorBuilder(), - queryBuilder.getVectorSimilarity() - ); - } - - knnVectorQueryBuilder.boost(queryBuilder.boost()); - knnVectorQueryBuilder.queryName(queryBuilder.queryName()); - return knnVectorQueryBuilder; + public KnnVectorQueryBuilder(KnnVectorQueryBuilder queryBuilder, String fieldName, QueryVectorBuilder queryVectorBuilder) { + this(fieldName, + queryBuilder.queryVector(), + queryVectorBuilder, + null, + queryBuilder.k(), + queryBuilder.numCands(), + queryBuilder.rescoreVectorBuilder(), + queryBuilder.getVectorSimilarity()); + this.boost = queryBuilder.boost(); + this.queryName = queryBuilder.queryName(); + this.filterQueries.addAll(queryBuilder.filterQueries()); } public String getFieldName() { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index 8703f8f8436f4..e6617abde7404 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -129,8 +129,7 @@ private QueryBuilder buildNestedQueryFromKnnVectorQuery( } private KnnVectorQueryBuilder addIndexFilterToKnnVectorQuery(Collection indices, KnnVectorQueryBuilder original) { - KnnVectorQueryBuilder copy = KnnVectorQueryBuilder.from(original); - copy.addFilterQueries(original.filterQueries()); + KnnVectorQueryBuilder copy = new KnnVectorQueryBuilder(original); copy.addFilterQuery(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return copy; } @@ -149,9 +148,7 @@ private KnnVectorQueryBuilder buildNewKnnVectorQuery( KnnVectorQueryBuilder original, QueryVectorBuilder queryVectorBuilder ) { - KnnVectorQueryBuilder newQueryBuilder = KnnVectorQueryBuilder.from(original, fieldName, queryVectorBuilder); - newQueryBuilder.addFilterQueries(original.filterQueries()); - return newQueryBuilder; + return new KnnVectorQueryBuilder(original, fieldName, queryVectorBuilder); } @Override From f4492990997afd36cc06c76a1fe26b0fa47b2ee2 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 27 Jun 2025 13:42:03 -0400 Subject: [PATCH 19/49] adding yaml tests for multiple indices --- .../test/inference/45_semantic_text_match.yml | 91 ++++++++++++++- .../46_semantic_text_sparse_vector.yml | 97 +++++++++++++++- .../test/inference/47_semantic_text_knn.yml | 109 +++++++++++++++++- 3 files changed, 288 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml index eb611983c1ce3..76fa6f24283c0 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml @@ -279,7 +279,7 @@ setup: - match: { hits.total.value: 0 } --- -"Apply boost and query name": +"Apply boost and query name on single index": - requires: cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" reason: fix boosting and query name for semantic text match queries. @@ -343,3 +343,92 @@ setup: - match: { hits.hits.0._id: "doc_1" } - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } + +--- +"Apply boost and query name on multiple indices": + - requires: + cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + reason: fix boosting and query name for semantic text match queries. + + - skip: + features: [ "headers", "close_to" ] + + - do: + indices.create: + index: index-with-semantic-field + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + inference_field: + type: semantic_text + inference_id: sparse-inference-id + + - do: + indices.create: + index: index-with-text-field + body: + mappings: + properties: + inference_field: + type: text + + - do: + index: + index: index-with-semantic-field + id: doc_1 + body: + inference_field: [ "It was a beautiful game", "Very competitive" ] + refresh: true + + - do: + index: + index: index-with-text-field + id: doc_1 + body: + inference_field: [ "It was a beautiful game", "Very competitive" ] + refresh: true + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: index-with-semantic-field,index-with-text-field + body: + query: + match: + inference_field: + query: "beautiful" + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "doc_1" } + - match: { hits.hits.1._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 1.1140361E19, error: 1e15 } } + - not_exists: hits.hits.0.matched_queries + - close_to: { hits.hits.1._score: { value: 0.2876821, error: 1e15 } } + - not_exists: hits.hits.1.matched_queries + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: index-with-semantic-field,index-with-text-field + body: + query: + match: + inference_field: + query: "beautiful" + boost: 5.0 + _name: i-like-naming-my-queries + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "doc_1" } + - match: { hits.hits.1._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 5.5701804E19, error: 1e15 } } + - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } + - close_to: { hits.hits.1._score: { value: 1.4384103, error: 1e15 } } + - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml index fee37abd2938c..b3f653f070dff 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml @@ -248,7 +248,7 @@ setup: - match: { hits.total.value: 2 } --- -"Apply boost and query name": +"Apply boost and query name on single index": - requires: cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" reason: fix boosting and query name for semantic text sparse vector queries. @@ -282,7 +282,7 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: test-semantic-text-index + index: test-sparse-index-random body: query: sparse_vector: @@ -291,7 +291,7 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 6.9606755E18, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e15 } } - not_exists: hits.hits.0.matched_queries - do: @@ -312,3 +312,94 @@ setup: - match: { hits.hits.0._id: "doc_1" } - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } + +--- +"Apply boost and query name on multiple indices": + - requires: + cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + reason: fix boosting and query name for semantic text sparse vector queries. + + - skip: + features: [ "headers", "close_to" ] + + - do: + indices.create: + index: index-with-semantic-field + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + inference_field: + type: semantic_text + inference_id: sparse-inference-id + + - do: + indices.create: + index: index-with-sparse-field + body: + mappings: + properties: + inference_field: + type: sparse_vector + + - do: + index: + index: index-with-semantic-field + id: doc_1 + body: + inference_field: [ "It was a beautiful game", "Very competitive" ] + refresh: true + + - do: + index: + index: index-with-sparse-field + id: doc_1 + body: + inference_field: { "feature_0": 1, "feature_1": 2, "feature_2": 3, "feature_3": 4, "feature_4": 5 } + refresh: true + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: index-with-semantic-field,index-with-sparse-field + body: + query: + sparse_vector: + field: inference_field + query: "soccer" + inference_id: sparse-inference-id + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "doc_1" } + - match: { hits.hits.1._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e15 } } + - not_exists: hits.hits.0.matched_queries + - close_to: { hits.hits.1._score: { value: 1.3455845E10, error: 1e15 } } + - not_exists: hits.hits.1.matched_queries + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: index-with-semantic-field,index-with-sparse-field + body: + query: + sparse_vector: + field: inference_field + query: "soccer" + inference_id: sparse-inference-id + boost: 5.0 + _name: i-like-naming-my-queries + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "doc_1" } + - match: { hits.hits.1._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e15 } } + - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } + - close_to: { hits.hits.1._score: { value: 6.7279225E10, error: 1e15 } } + - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml index 377700f2d22bc..834471229bb88 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml @@ -405,7 +405,7 @@ setup: - match: { hits.total.value: 4 } --- -"Apply boost and query name": +"Apply boost and query name on single index": - requires: cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" reason: fix boosting and query name for semantic text knn queries. @@ -415,7 +415,7 @@ setup: - do: indices.create: - index: test-sparse-index-random + index: test-dense-index-random body: settings: number_of_shards: 1 @@ -428,7 +428,7 @@ setup: - do: index: - index: test-sparse-index-random + index: test-dense-index-random id: doc_1 body: inference_field: [ "It was a beautiful game", "Very competitive" ] @@ -439,7 +439,7 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: test-semantic-text-index + index: test-dense-index-random body: query: knn: @@ -460,7 +460,7 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: test-sparse-index-random + index: test-dense-index-random body: query: knn: @@ -478,4 +478,103 @@ setup: - close_to: { hits.hits.0._score: { value: 4.9907494, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } +--- +"Apply boost and query name on multiple indices": + - requires: + cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + reason: fix boosting and query name for semantic text knn queries. + + - skip: + features: [ "headers", "close_to" ] + + - do: + indices.create: + index: index-with-semantic-field + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + inference_field: + type: semantic_text + inference_id: dense-inference-id + + - do: + indices.create: + index: index-with-dense-field + body: + mappings: + properties: + inference_field: + type: dense_vector + dims: 10 + similarity: cosine + + - do: + index: + index: index-with-semantic-field + id: doc_1 + body: + inference_field: [ "It was a beautiful game", "Very competitive" ] + refresh: true + + - do: + index: + index: index-with-dense-field + id: doc_1 + body: + inference_field: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] + refresh: true + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: index-with-semantic-field,index-with-dense-field + body: + query: + knn: + field: inference_field + k: 2 + num_candidates: 100 + query_vector_builder: + text_embedding: + model_text: soccer + model_id: dense-inference-id + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "doc_1" } + - match: { hits.hits.1._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 0.9984111, error: 1e15 } } + - not_exists: hits.hits.0.matched_queries + - close_to: { hits.hits.1._score: { value: 0.9984111, error: 1e15 } } + - not_exists: hits.hits.1.matched_queries + + - do: + headers: + # Force JSON content type so that we use a parser that interprets the floating-point score as a double + Content-Type: application/json + search: + index: index-with-semantic-field,index-with-dense-field + body: + query: + knn: + field: inference_field + k: 2 + num_candidates: 100 + query_vector_builder: + text_embedding: + model_text: soccer + model_id: dense-inference-id + boost: 5.0 + _name: i-like-naming-my-queries + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "doc_1" } + - match: { hits.hits.1._id: "doc_1" } + - close_to: { hits.hits.0._score: { value: 4.9907494, error: 1e15 } } + - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } + - close_to: { hits.hits.1._score: { value: 4.9907494, error: 1e15 } } + - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } From 6db0abf78d5ae2f73a73cc900aeb519e1201fc4c Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 27 Jun 2025 14:41:44 -0400 Subject: [PATCH 20/49] refactoring match query to adjust boost and queryname and move to copy constructor --- .../SemanticMatchQueryRewriteInterceptor.java | 12 +++++++++--- .../inference/queries/SemanticQueryBuilder.java | 9 ++++----- .../queries/SemanticQueryRewriteInterceptor.java | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index 5721ccd73d5cb..eb3ca7cabdfcf 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -36,7 +36,7 @@ protected String getQuery(QueryBuilder queryBuilder) { @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { - return SemanticQueryBuilder.from(queryBuilder, indexInformation.fieldName(), getQuery(queryBuilder), false); + return new SemanticQueryBuilder(queryBuilder, indexInformation.fieldName(), getQuery(queryBuilder), false); } @Override @@ -47,9 +47,15 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( assert (queryBuilder instanceof MatchQueryBuilder); MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) queryBuilder; BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - QueryBuilder semanticQueryBuilder = createSemanticSubQuery(indexInformation.getInferenceIndices(), matchQueryBuilder); - boolQueryBuilder.should(semanticQueryBuilder); + boolQueryBuilder.should( + createSemanticSubQuery( + indexInformation.getInferenceIndices(), + matchQueryBuilder + ) + ); boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder)); + boolQueryBuilder.boost(matchQueryBuilder.boost()); + boolQueryBuilder.queryName(matchQueryBuilder.queryName()); return boolQueryBuilder; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index f49fc1636e710..5d1dff9887fdc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -109,11 +109,10 @@ public SemanticQueryBuilder(StreamInput in) throws IOException { } } - public static SemanticQueryBuilder from(QueryBuilder queryBuilder, String fieldName, String query, Boolean lenient) { - SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(fieldName, query, lenient); - semanticQueryBuilder.boost(queryBuilder.boost()); - semanticQueryBuilder.queryName(queryBuilder.queryName()); - return semanticQueryBuilder; + public SemanticQueryBuilder(QueryBuilder queryBuilder, String fieldName, String query, Boolean lenient) { + this(fieldName, query, lenient); + this.boost = queryBuilder.boost(); + this.queryName = queryBuilder.queryName(); } @Override diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index d2936f3542d11..c483eb2c7143b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -116,7 +116,7 @@ protected QueryBuilder createSubQueryForIndices(Collection indices, Quer protected QueryBuilder createSemanticSubQuery(Collection indices, QueryBuilder queryBuilder) { BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - boolQueryBuilder.must(SemanticQueryBuilder.from(queryBuilder, getFieldName(queryBuilder), getQuery(queryBuilder), true)); + boolQueryBuilder.must(new SemanticQueryBuilder(getFieldName(queryBuilder), getQuery(queryBuilder), true)); boolQueryBuilder.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return boolQueryBuilder; } From daf2cb4dc1bfd004c213e2d83891ec8a19dc14d9 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 27 Jun 2025 16:50:37 -0400 Subject: [PATCH 21/49] refactoring sparse query to adjust boost and queryname and move to copy constructor --- .../core/ml/search/SparseVectorQueryBuilder.java | 4 ++-- ...SemanticSparseVectorQueryRewriteInterceptor.java | 13 +++++++++---- ...ticSparseVectorQueryRewriteInterceptorTests.java | 5 +++-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java index 755a06e49e1e8..aa59b72fa4671 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java @@ -170,8 +170,8 @@ public static SparseVectorQueryBuilder from( shouldPruneTokens, tokenPruningConfig ); - sparseVectorQueryBuilder.boost(queryBuilder.boost()); - sparseVectorQueryBuilder.queryName(queryBuilder.queryName()); +// sparseVectorQueryBuilder.boost(queryBuilder.boost()); +// sparseVectorQueryBuilder.queryName(queryBuilder.queryName()); return sparseVectorQueryBuilder; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java index 6fc92fbd2d683..a8c53bf537c79 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java @@ -43,14 +43,18 @@ protected String getQuery(QueryBuilder queryBuilder) { @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { Map> inferenceIdsIndices = indexInformation.getInferenceIdsIndices(); + QueryBuilder finalQueryBuilder; if (inferenceIdsIndices.size() == 1) { // Simple case, everything uses the same inference ID String searchInferenceId = inferenceIdsIndices.keySet().iterator().next(); - return buildNestedQueryFromSparseVectorQuery(queryBuilder, searchInferenceId); + finalQueryBuilder = buildNestedQueryFromSparseVectorQuery(queryBuilder, searchInferenceId); } else { // Multiple inference IDs, construct a boolean query - return buildInferenceQueryWithMultipleInferenceIds(queryBuilder, inferenceIdsIndices); + finalQueryBuilder = buildInferenceQueryWithMultipleInferenceIds(queryBuilder, inferenceIdsIndices); } + finalQueryBuilder.queryName(queryBuilder.queryName()); + finalQueryBuilder.boost(queryBuilder.boost()); + return finalQueryBuilder; } private QueryBuilder buildInferenceQueryWithMultipleInferenceIds( @@ -95,6 +99,8 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( ) ); } + boolQueryBuilder.boost(queryBuilder.boost()); + boolQueryBuilder.queryName(queryBuilder.queryName()); return boolQueryBuilder; } @@ -114,8 +120,7 @@ private QueryBuilder buildNestedQueryFromSparseVectorQuery(QueryBuilder queryBui sparseVectorQueryBuilder.shouldPruneTokens(), sparseVectorQueryBuilder.getTokenPruningConfig() ), - ScoreMode.Max, - queryBuilder.queryName() + ScoreMode.Max ); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java index 07c0e7ab2979a..0ff7394b3f868 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java @@ -128,6 +128,8 @@ public void testBoostAndQueryNameOnSparseVectorQueryRewrite() throws IOException rewritten instanceof InterceptedQueryBuilderWrapper ); InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; + assertEquals(BOOST, intercepted.boost(), 1.0f); + assertEquals(QUERY_NAME, intercepted.queryName()); assertTrue(intercepted.queryBuilder instanceof NestedQueryBuilder); NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; assertEquals(SemanticTextField.getChunksFieldName(FIELD_NAME), nestedQueryBuilder.path()); @@ -137,8 +139,7 @@ public void testBoostAndQueryNameOnSparseVectorQueryRewrite() throws IOException assertEquals(SemanticTextField.getEmbeddingsFieldName(FIELD_NAME), sparseVectorQueryBuilder.getFieldName()); assertEquals(INFERENCE_ID, sparseVectorQueryBuilder.getInferenceId()); assertEquals(QUERY, sparseVectorQueryBuilder.getQuery()); - assertEquals(BOOST, sparseVectorQueryBuilder.boost(), 0.0f); - assertEquals(QUERY_NAME, sparseVectorQueryBuilder.queryName()); + assertEquals(BOOST, sparseVectorQueryBuilder.boost(), 1.0f); } private QueryRewriteContext createQueryRewriteContext(Map inferenceFields) { From b88b077e5496617e76c219b75b716c80af132f5e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 27 Jun 2025 21:02:56 +0000 Subject: [PATCH 22/49] [CI] Auto commit changes from spotless --- .../search/vectors/KnnVectorQueryBuilder.java | 6 ++++-- .../xpack/core/ml/search/SparseVectorQueryBuilder.java | 4 ++-- .../queries/SemanticMatchQueryRewriteInterceptor.java | 7 +------ 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java index fd59506af3bff..9da20712522a6 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java @@ -270,14 +270,16 @@ public KnnVectorQueryBuilder(KnnVectorQueryBuilder queryBuilder) { } public KnnVectorQueryBuilder(KnnVectorQueryBuilder queryBuilder, String fieldName, QueryVectorBuilder queryVectorBuilder) { - this(fieldName, + this( + fieldName, queryBuilder.queryVector(), queryVectorBuilder, null, queryBuilder.k(), queryBuilder.numCands(), queryBuilder.rescoreVectorBuilder(), - queryBuilder.getVectorSimilarity()); + queryBuilder.getVectorSimilarity() + ); this.boost = queryBuilder.boost(); this.queryName = queryBuilder.queryName(); this.filterQueries.addAll(queryBuilder.filterQueries()); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java index aa59b72fa4671..4a4e93923a6f3 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java @@ -170,8 +170,8 @@ public static SparseVectorQueryBuilder from( shouldPruneTokens, tokenPruningConfig ); -// sparseVectorQueryBuilder.boost(queryBuilder.boost()); -// sparseVectorQueryBuilder.queryName(queryBuilder.queryName()); + // sparseVectorQueryBuilder.boost(queryBuilder.boost()); + // sparseVectorQueryBuilder.queryName(queryBuilder.queryName()); return sparseVectorQueryBuilder; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index eb3ca7cabdfcf..d571583d25753 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -47,12 +47,7 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( assert (queryBuilder instanceof MatchQueryBuilder); MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) queryBuilder; BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - boolQueryBuilder.should( - createSemanticSubQuery( - indexInformation.getInferenceIndices(), - matchQueryBuilder - ) - ); + boolQueryBuilder.should(createSemanticSubQuery(indexInformation.getInferenceIndices(), matchQueryBuilder)); boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder)); boolQueryBuilder.boost(matchQueryBuilder.boost()); boolQueryBuilder.queryName(matchQueryBuilder.queryName()); From 9e725cb039fbf3d22c8e49f3e76ac8c2849e8bef Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 2 Jul 2025 13:35:36 -0400 Subject: [PATCH 23/49] Refactor sparse vector to adjust boost and queryname in the top level --- .../ml/search/SparseVectorQueryBuilder.java | 22 ------------------- ...icSparseVectorQueryRewriteInterceptor.java | 3 +-- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java index 4a4e93923a6f3..cc67bc28d675e 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java @@ -153,28 +153,6 @@ private SparseVectorQueryBuilder(SparseVectorQueryBuilder other, SetOnce queryVectors, - @Nullable String inferenceId, - @Nullable String query, - @Nullable Boolean shouldPruneTokens, - @Nullable TokenPruningConfig tokenPruningConfig - ) { - SparseVectorQueryBuilder sparseVectorQueryBuilder = new SparseVectorQueryBuilder( - fieldName, - queryVectors, - inferenceId, - query, - shouldPruneTokens, - tokenPruningConfig - ); - // sparseVectorQueryBuilder.boost(queryBuilder.boost()); - // sparseVectorQueryBuilder.queryName(queryBuilder.queryName()); - return sparseVectorQueryBuilder; - } - public String getFieldName() { return fieldName; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java index a8c53bf537c79..3a11eb98aa884 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java @@ -109,8 +109,7 @@ private QueryBuilder buildNestedQueryFromSparseVectorQuery(QueryBuilder queryBui SparseVectorQueryBuilder sparseVectorQueryBuilder = (SparseVectorQueryBuilder) queryBuilder; return QueryBuilders.nestedQuery( SemanticTextField.getChunksFieldName(sparseVectorQueryBuilder.getFieldName()), - SparseVectorQueryBuilder.from( - queryBuilder, + new SparseVectorQueryBuilder( SemanticTextField.getEmbeddingsFieldName(sparseVectorQueryBuilder.getFieldName()), sparseVectorQueryBuilder.getQueryVectors(), (sparseVectorQueryBuilder.getInferenceId() == null && sparseVectorQueryBuilder.getQuery() != null) From 651ee2bebdcf2abc2b9a9ac200a864576bda99e5 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 2 Jul 2025 13:44:17 -0400 Subject: [PATCH 24/49] Refactor knn vector to adjust boost and queryname in the top level --- .../index/query/NestedQueryBuilder.java | 4 ---- .../index/query/QueryBuilders.java | 4 ---- .../search/vectors/KnnVectorQueryBuilder.java | 2 -- ...emanticKnnVectorQueryRewriteInterceptor.java | 17 ++++++++--------- 4 files changed, 8 insertions(+), 19 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java index 657273ed8583f..34c5ede62a656 100644 --- a/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java @@ -84,10 +84,6 @@ private NestedQueryBuilder(String path, QueryBuilder query, ScoreMode scoreMode, this.innerHitBuilder = innerHitBuilder; } - public static NestedQueryBuilder from(String path, QueryBuilder query, ScoreMode scoreMode, String queryName) { - return new NestedQueryBuilder(path, query, scoreMode).queryName(queryName); - } - /** * Read from a stream. */ diff --git a/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java b/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java index a0db2f8e23426..66f1c9a74d4c2 100644 --- a/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java +++ b/server/src/main/java/org/elasticsearch/index/query/QueryBuilders.java @@ -484,10 +484,6 @@ public static NestedQueryBuilder nestedQuery(String path, QueryBuilder query, Sc return new NestedQueryBuilder(path, query, scoreMode); } - public static NestedQueryBuilder nestedQuery(String path, QueryBuilder query, ScoreMode scoreMode, String queryName) { - return NestedQueryBuilder.from(path, query, scoreMode, queryName); - } - /** * A filter for a field based on several terms matching on any of them. * diff --git a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java index 9da20712522a6..3fbfd09329212 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java @@ -280,8 +280,6 @@ public KnnVectorQueryBuilder(KnnVectorQueryBuilder queryBuilder, String fieldNam queryBuilder.rescoreVectorBuilder(), queryBuilder.getVectorSimilarity() ); - this.boost = queryBuilder.boost(); - this.queryName = queryBuilder.queryName(); this.filterQueries.addAll(queryBuilder.filterQueries()); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index e6617abde7404..f2fe3362ab862 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -52,16 +52,20 @@ protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceI assert (queryBuilder instanceof KnnVectorQueryBuilder); KnnVectorQueryBuilder knnVectorQueryBuilder = (KnnVectorQueryBuilder) queryBuilder; Map> inferenceIdsIndices = indexInformation.getInferenceIdsIndices(); + QueryBuilder finalQueryBuilder; if (inferenceIdsIndices.size() == 1) { // Simple case, everything uses the same inference ID Map.Entry> inferenceIdIndex = inferenceIdsIndices.entrySet().iterator().next(); String searchInferenceId = inferenceIdIndex.getKey(); List indices = inferenceIdIndex.getValue(); - return buildNestedQueryFromKnnVectorQuery(knnVectorQueryBuilder, indices, searchInferenceId); + finalQueryBuilder = buildNestedQueryFromKnnVectorQuery(knnVectorQueryBuilder, indices, searchInferenceId); } else { // Multiple inference IDs, construct a boolean query - return buildInferenceQueryWithMultipleInferenceIds(knnVectorQueryBuilder, inferenceIdsIndices); + finalQueryBuilder = buildInferenceQueryWithMultipleInferenceIds(knnVectorQueryBuilder, inferenceIdsIndices); } + finalQueryBuilder.boost(queryBuilder.boost()); + finalQueryBuilder.queryName(queryBuilder.queryName()); + return finalQueryBuilder; } private QueryBuilder buildInferenceQueryWithMultipleInferenceIds( @@ -118,13 +122,8 @@ private QueryBuilder buildNestedQueryFromKnnVectorQuery( } return QueryBuilders.nestedQuery( SemanticTextField.getChunksFieldName(filteredKnnVectorQueryBuilder.getFieldName()), - buildNewKnnVectorQuery( - SemanticTextField.getEmbeddingsFieldName(filteredKnnVectorQueryBuilder.getFieldName()), - filteredKnnVectorQueryBuilder, - queryVectorBuilder - ), - ScoreMode.Max, - filteredKnnVectorQueryBuilder.queryName() + new KnnVectorQueryBuilder(filteredKnnVectorQueryBuilder, SemanticTextField.getEmbeddingsFieldName(filteredKnnVectorQueryBuilder.getFieldName()), queryVectorBuilder), + ScoreMode.Max ); } From 71eac8ddb5f88c367139baa88935e60673632fb4 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 2 Jul 2025 14:43:37 -0400 Subject: [PATCH 25/49] fix knn combined query --- .../queries/SemanticKnnVectorQueryRewriteInterceptor.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index f2fe3362ab862..d97f9540ec836 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -106,6 +106,8 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( ) ); } + boolQueryBuilder.boost(queryBuilder.boost()); + boolQueryBuilder.queryName(queryBuilder.queryName()); return boolQueryBuilder; } From d71bf2c97bfc10e7cdc77d1acb839fa67b6fc028 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 2 Jul 2025 14:48:08 -0400 Subject: [PATCH 26/49] fix unit tests --- .../SemanticKnnVectorQueryRewriteInterceptorTests.java | 8 +++++--- .../query/SemanticMatchQueryRewriteInterceptorTests.java | 2 +- .../SemanticSparseVectorQueryRewriteInterceptorTests.java | 7 ++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java index f970df44ff06d..a8c6f37ae779c 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java @@ -65,7 +65,7 @@ public void testKnnQueryWithVectorBuilderIsInterceptedAndRewritten() throws IOEx } public void testKnnQueryWithVectorBuilderIsInterceptedAndRewrittenWithBoostAndQueryName() throws IOException { - float BOOST = 2.0f; + float BOOST = 5.0f; String QUERY_NAME = "knn_query"; Map inferenceFields = Map.of( @@ -81,10 +81,12 @@ public void testKnnQueryWithVectorBuilderIsInterceptedAndRewrittenWithBoostAndQu testRewrittenInferenceQuery(context, original); QueryBuilder rewritten = original.rewrite(context); InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; + assertEquals(BOOST, intercepted.boost(), 0.0f); + assertEquals(QUERY_NAME, intercepted.queryName()); NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; KnnVectorQueryBuilder knnVectorQueryBuilder = (KnnVectorQueryBuilder) nestedQueryBuilder.query(); - assertEquals(BOOST, knnVectorQueryBuilder.boost(), 0.0f); - assertEquals(QUERY_NAME, knnVectorQueryBuilder.queryName()); + assertEquals(BOOST, knnVectorQueryBuilder.boost(), 5.0f); + assertNull(knnVectorQueryBuilder.queryName()); } public void testKnnWithQueryBuilderWithoutInferenceIdIsInterceptedAndRewritten() throws IOException { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java index c7ea66fe3de83..bc7f1a2b0b8b1 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java @@ -37,7 +37,7 @@ public class SemanticMatchQueryRewriteInterceptorTests extends ESTestCase { private static final String FIELD_NAME = "fieldName"; private static final String VALUE = "value"; private static final String QUERY_NAME = "match_query"; - private static final float BOOST = 2.0f; + private static final float BOOST = 5.0f; @Before public void setup() { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java index 0ff7394b3f868..70867ea441e74 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java @@ -111,7 +111,7 @@ public void testSparseVectorQueryOnNonInferenceFieldRemainsUnchanged() throws IO } public void testBoostAndQueryNameOnSparseVectorQueryRewrite() throws IOException { - float BOOST = 2.0f; + float BOOST = 5.0f; String QUERY_NAME = "sparse_vector_query"; Map inferenceFields = Map.of( @@ -128,7 +128,7 @@ public void testBoostAndQueryNameOnSparseVectorQueryRewrite() throws IOException rewritten instanceof InterceptedQueryBuilderWrapper ); InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; - assertEquals(BOOST, intercepted.boost(), 1.0f); + assertEquals(BOOST, intercepted.boost(), 0.0f); assertEquals(QUERY_NAME, intercepted.queryName()); assertTrue(intercepted.queryBuilder instanceof NestedQueryBuilder); NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; @@ -139,7 +139,8 @@ public void testBoostAndQueryNameOnSparseVectorQueryRewrite() throws IOException assertEquals(SemanticTextField.getEmbeddingsFieldName(FIELD_NAME), sparseVectorQueryBuilder.getFieldName()); assertEquals(INFERENCE_ID, sparseVectorQueryBuilder.getInferenceId()); assertEquals(QUERY, sparseVectorQueryBuilder.getQuery()); - assertEquals(BOOST, sparseVectorQueryBuilder.boost(), 1.0f); + assertEquals(BOOST, sparseVectorQueryBuilder.boost(), 5.0f); + assertNull(sparseVectorQueryBuilder.queryName()); } private QueryRewriteContext createQueryRewriteContext(Map inferenceFields) { From 675463ce782a9abdf85bd02b0304be9f443a454c Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 2 Jul 2025 14:49:08 -0400 Subject: [PATCH 27/49] fix lint issues --- .../queries/SemanticKnnVectorQueryRewriteInterceptor.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index d97f9540ec836..aabe3e00705c9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -124,7 +124,11 @@ private QueryBuilder buildNestedQueryFromKnnVectorQuery( } return QueryBuilders.nestedQuery( SemanticTextField.getChunksFieldName(filteredKnnVectorQueryBuilder.getFieldName()), - new KnnVectorQueryBuilder(filteredKnnVectorQueryBuilder, SemanticTextField.getEmbeddingsFieldName(filteredKnnVectorQueryBuilder.getFieldName()), queryVectorBuilder), + new KnnVectorQueryBuilder( + filteredKnnVectorQueryBuilder, + SemanticTextField.getEmbeddingsFieldName(filteredKnnVectorQueryBuilder.getFieldName()), + queryVectorBuilder + ), ScoreMode.Max ); } From 201d27c1c533f687184c5308c43e8cd5064462d0 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 2 Jul 2025 14:57:03 -0400 Subject: [PATCH 28/49] remove unused code --- .../queries/SemanticKnnVectorQueryRewriteInterceptor.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index aabe3e00705c9..a3fad37412de7 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -148,14 +148,6 @@ private TextEmbeddingQueryVectorBuilder getTextEmbeddingQueryBuilderFromQuery(Kn return (TextEmbeddingQueryVectorBuilder) queryVectorBuilder; } - private KnnVectorQueryBuilder buildNewKnnVectorQuery( - String fieldName, - KnnVectorQueryBuilder original, - QueryVectorBuilder queryVectorBuilder - ) { - return new KnnVectorQueryBuilder(original, fieldName, queryVectorBuilder); - } - @Override public String getQueryName() { return KnnVectorQueryBuilder.NAME; From daf2f6ede80e0ea8a3c69af30bd01247afa3dc20 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 3 Jul 2025 10:50:17 -0400 Subject: [PATCH 29/49] Update inference feature name --- .../elasticsearch/xpack/inference/InferenceFeatures.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 7c18453be9045..fe31ae71ba8c1 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -37,11 +37,11 @@ public class InferenceFeatures implements FeatureSpecification { private static final NodeFeature TEST_RULE_RETRIEVER_WITH_INDICES_THAT_DONT_RETURN_RANK_DOCS = new NodeFeature( "test_rule_retriever.with_indices_that_dont_return_rank_docs" ); + private static final NodeFeature SEMANTIC_QUERY_REWRITE_INTERCEPTORS_PROPAGATE_BOOST_AND_QUERY_NAME_FIX = new NodeFeature( + "semantic_query_rewrite_interceptors.propagate_boost_and_query_name_fix" + ); private static final NodeFeature SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER = new NodeFeature("semantic_text.match_all_highlighter"); private static final NodeFeature COHERE_V2_API = new NodeFeature("inference.cohere.v2"); - private static final NodeFeature SEMANTIC_TEXT_QUERY_REWRITE_BOOST_AND_QUERY_NAME_FIX = new NodeFeature( - "semantic_text.query_rewrite.boost_and_query_name_fix" - ); @Override public Set getTestFeatures() { @@ -72,7 +72,7 @@ public Set getTestFeatures() { SEMANTIC_TEXT_INDEX_OPTIONS, COHERE_V2_API, SEMANTIC_TEXT_INDEX_OPTIONS_WITH_DEFAULTS, - SEMANTIC_TEXT_QUERY_REWRITE_BOOST_AND_QUERY_NAME_FIX + SEMANTIC_QUERY_REWRITE_INTERCEPTORS_PROPAGATE_BOOST_AND_QUERY_NAME_FIX ); } } From 2521b48135bb5268113d415963406c65460af0e3 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 3 Jul 2025 14:04:37 -0400 Subject: [PATCH 30/49] Remove double boosting issue from match --- .../SemanticMatchQueryRewriteInterceptor.java | 17 +++++++++++++---- .../inference/queries/SemanticQueryBuilder.java | 6 ------ .../SemanticQueryRewriteInterceptor.java | 7 +++++-- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index d571583d25753..b06cfbbeb7231 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -36,7 +36,10 @@ protected String getQuery(QueryBuilder queryBuilder) { @Override protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) { - return new SemanticQueryBuilder(queryBuilder, indexInformation.fieldName(), getQuery(queryBuilder), false); + SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(indexInformation.fieldName(), getQuery(queryBuilder), false); + semanticQueryBuilder.boost(queryBuilder.boost()); + semanticQueryBuilder.queryName(queryBuilder.queryName()); + return semanticQueryBuilder; } @Override @@ -47,10 +50,16 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( assert (queryBuilder instanceof MatchQueryBuilder); MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) queryBuilder; BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - boolQueryBuilder.should(createSemanticSubQuery(indexInformation.getInferenceIndices(), matchQueryBuilder)); + boolQueryBuilder.should( + createSemanticSubQuery( + indexInformation.getInferenceIndices(), + matchQueryBuilder.fieldName(), + (String) matchQueryBuilder.value(), + matchQueryBuilder.boost(), + matchQueryBuilder.queryName() + ) + ); boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder)); - boolQueryBuilder.boost(matchQueryBuilder.boost()); - boolQueryBuilder.queryName(matchQueryBuilder.queryName()); return boolQueryBuilder; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 5d1dff9887fdc..182c083ef1c26 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -109,12 +109,6 @@ public SemanticQueryBuilder(StreamInput in) throws IOException { } } - public SemanticQueryBuilder(QueryBuilder queryBuilder, String fieldName, String query, Boolean lenient) { - this(fieldName, query, lenient); - this.boost = queryBuilder.boost(); - this.queryName = queryBuilder.queryName(); - } - @Override protected void doWriteTo(StreamOutput out) throws IOException { if (inferenceResultsSupplier != null) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index c483eb2c7143b..152bbe4d799b6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -114,9 +114,12 @@ protected QueryBuilder createSubQueryForIndices(Collection indices, Quer return boolQueryBuilder; } - protected QueryBuilder createSemanticSubQuery(Collection indices, QueryBuilder queryBuilder) { + protected QueryBuilder createSemanticSubQuery(Collection indices, String fieldName, String value, float boost, String queryName) { BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - boolQueryBuilder.must(new SemanticQueryBuilder(getFieldName(queryBuilder), getQuery(queryBuilder), true)); + SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(fieldName, value, true); + semanticQueryBuilder.boost(boost); + semanticQueryBuilder.queryName(queryName); + boolQueryBuilder.must(semanticQueryBuilder); boolQueryBuilder.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return boolQueryBuilder; } From 61f9445371dc42b1197d06c5e4af8de687281818 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 3 Jul 2025 14:11:23 -0400 Subject: [PATCH 31/49] Fix double boosting in match test yaml file --- .../test/inference/45_semantic_text_match.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml index 76fa6f24283c0..8f6c318fb4d7b 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml @@ -281,7 +281,7 @@ setup: --- "Apply boost and query name on single index": - requires: - cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + cluster_features: "semantic_query_rewrite_interceptors.propagate_boost_and_query_name_fix" reason: fix boosting and query name for semantic text match queries. - skip: @@ -347,7 +347,7 @@ setup: --- "Apply boost and query name on multiple indices": - requires: - cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + cluster_features: "semantic_query_rewrite_interceptors.propagate_boost_and_query_name_fix" reason: fix boosting and query name for semantic text match queries. - skip: @@ -386,7 +386,7 @@ setup: - do: index: index: index-with-text-field - id: doc_1 + id: doc_2 body: inference_field: [ "It was a beautiful game", "Very competitive" ] refresh: true @@ -405,10 +405,10 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - - match: { hits.hits.1._id: "doc_1" } + - match: { hits.hits.1._id: "doc_2" } - close_to: { hits.hits.0._score: { value: 1.1140361E19, error: 1e15 } } - not_exists: hits.hits.0.matched_queries - - close_to: { hits.hits.1._score: { value: 0.2876821, error: 1e15 } } + - close_to: { hits.hits.1._score: { value: 0.2876821, error: 1e-7 } } - not_exists: hits.hits.1.matched_queries - do: @@ -427,8 +427,8 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - - match: { hits.hits.1._id: "doc_1" } + - match: { hits.hits.1._id: "doc_2" } - close_to: { hits.hits.0._score: { value: 5.5701804E19, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } - - close_to: { hits.hits.1._score: { value: 1.4384103, error: 1e15 } } + - close_to: { hits.hits.1._score: { value: 1.4384103, error: 1e-7 } } - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } From f4cadaac8007258529b0f55ad6c11f1f56b53f76 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 3 Jul 2025 14:26:53 -0400 Subject: [PATCH 32/49] move to bool level for match semantic boost --- .../inference/queries/SemanticQueryRewriteInterceptor.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 152bbe4d799b6..5348651d3a5b0 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -116,10 +116,9 @@ protected QueryBuilder createSubQueryForIndices(Collection indices, Quer protected QueryBuilder createSemanticSubQuery(Collection indices, String fieldName, String value, float boost, String queryName) { BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - SemanticQueryBuilder semanticQueryBuilder = new SemanticQueryBuilder(fieldName, value, true); - semanticQueryBuilder.boost(boost); - semanticQueryBuilder.queryName(queryName); - boolQueryBuilder.must(semanticQueryBuilder); + boolQueryBuilder.must(new SemanticQueryBuilder(fieldName, value, true)); + boolQueryBuilder.boost(boost); + boolQueryBuilder.queryName(queryName); boolQueryBuilder.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return boolQueryBuilder; } From 08909de29330a9a75bfb93ae5e991edcb65bb840 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 3 Jul 2025 14:38:35 -0400 Subject: [PATCH 33/49] fix double boosting for sparse vector --- .../queries/SemanticSparseVectorQueryRewriteInterceptor.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java index 3a11eb98aa884..54b0a802d0fd2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java @@ -99,8 +99,6 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( ) ); } - boolQueryBuilder.boost(queryBuilder.boost()); - boolQueryBuilder.queryName(queryBuilder.queryName()); return boolQueryBuilder; } @@ -120,7 +118,7 @@ private QueryBuilder buildNestedQueryFromSparseVectorQuery(QueryBuilder queryBui sparseVectorQueryBuilder.getTokenPruningConfig() ), ScoreMode.Max - ); + ).queryName(queryBuilder.queryName()).boost(queryBuilder.boost()); } @Override From 37bfc4304cb761009f78f64e21b0b68fb35b00e7 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 3 Jul 2025 14:39:28 -0400 Subject: [PATCH 34/49] fix double boosting for sparse vector in yaml test --- .../inference/46_semantic_text_sparse_vector.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml index b3f653f070dff..3e99af024e4f7 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml @@ -250,7 +250,7 @@ setup: --- "Apply boost and query name on single index": - requires: - cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + cluster_features: "semantic_query_rewrite_interceptors.propagate_boost_and_query_name_fix" reason: fix boosting and query name for semantic text sparse vector queries. - skip: @@ -316,7 +316,7 @@ setup: --- "Apply boost and query name on multiple indices": - requires: - cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + cluster_features: "semantic_query_rewrite_interceptors.propagate_boost_and_query_name_fix" reason: fix boosting and query name for semantic text sparse vector queries. - skip: @@ -355,7 +355,7 @@ setup: - do: index: index: index-with-sparse-field - id: doc_1 + id: doc_2 body: inference_field: { "feature_0": 1, "feature_1": 2, "feature_2": 3, "feature_3": 4, "feature_4": 5 } refresh: true @@ -375,10 +375,10 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - - match: { hits.hits.1._id: "doc_1" } + - match: { hits.hits.1._id: "doc_2" } - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e15 } } - not_exists: hits.hits.0.matched_queries - - close_to: { hits.hits.1._score: { value: 1.3455845E10, error: 1e15 } } + - close_to: { hits.hits.1._score: { value: 1.3455845E10, error: 1e-7 } } - not_exists: hits.hits.1.matched_queries - do: @@ -398,8 +398,8 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - - match: { hits.hits.1._id: "doc_1" } + - match: { hits.hits.1._id: "doc_2" } - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } - - close_to: { hits.hits.1._score: { value: 6.7279225E10, error: 1e15 } } + - close_to: { hits.hits.1._score: { value: 6.7279225E10, error: 1e-7 } } - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } From fa5cfe776342512eb57f93f2fbb2fca735b05f0b Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 3 Jul 2025 16:23:49 -0400 Subject: [PATCH 35/49] fix knn combined query --- .../search/vectors/KnnVectorQueryBuilder.java | 18 ----- ...anticKnnVectorQueryRewriteInterceptor.java | 67 +++++++++++++++---- 2 files changed, 55 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java index 92cb5ea639611..ea0c15642eb74 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java @@ -265,24 +265,6 @@ public KnnVectorQueryBuilder(StreamInput in) throws IOException { this.queryVectorSupplier = null; } - public KnnVectorQueryBuilder(KnnVectorQueryBuilder queryBuilder) { - this(queryBuilder, queryBuilder.getFieldName(), queryBuilder.queryVectorBuilder()); - } - - public KnnVectorQueryBuilder(KnnVectorQueryBuilder queryBuilder, String fieldName, QueryVectorBuilder queryVectorBuilder) { - this( - fieldName, - queryBuilder.queryVector(), - queryVectorBuilder, - null, - queryBuilder.k(), - queryBuilder.numCands(), - queryBuilder.rescoreVectorBuilder(), - queryBuilder.getVectorSimilarity() - ); - this.filterQueries.addAll(queryBuilder.filterQueries()); - } - public String getFieldName() { return fieldName; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index a3fad37412de7..5a48c1cf9ff01 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -52,20 +52,16 @@ protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceI assert (queryBuilder instanceof KnnVectorQueryBuilder); KnnVectorQueryBuilder knnVectorQueryBuilder = (KnnVectorQueryBuilder) queryBuilder; Map> inferenceIdsIndices = indexInformation.getInferenceIdsIndices(); - QueryBuilder finalQueryBuilder; if (inferenceIdsIndices.size() == 1) { // Simple case, everything uses the same inference ID Map.Entry> inferenceIdIndex = inferenceIdsIndices.entrySet().iterator().next(); String searchInferenceId = inferenceIdIndex.getKey(); List indices = inferenceIdIndex.getValue(); - finalQueryBuilder = buildNestedQueryFromKnnVectorQuery(knnVectorQueryBuilder, indices, searchInferenceId); + return buildNestedQueryFromKnnVectorQuery(knnVectorQueryBuilder, indices, searchInferenceId); } else { // Multiple inference IDs, construct a boolean query - finalQueryBuilder = buildInferenceQueryWithMultipleInferenceIds(knnVectorQueryBuilder, inferenceIdsIndices); + return buildInferenceQueryWithMultipleInferenceIds(knnVectorQueryBuilder, inferenceIdsIndices); } - finalQueryBuilder.boost(queryBuilder.boost()); - finalQueryBuilder.queryName(queryBuilder.queryName()); - return finalQueryBuilder; } private QueryBuilder buildInferenceQueryWithMultipleInferenceIds( @@ -106,8 +102,6 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( ) ); } - boolQueryBuilder.boost(queryBuilder.boost()); - boolQueryBuilder.queryName(queryBuilder.queryName()); return boolQueryBuilder; } @@ -124,17 +118,37 @@ private QueryBuilder buildNestedQueryFromKnnVectorQuery( } return QueryBuilders.nestedQuery( SemanticTextField.getChunksFieldName(filteredKnnVectorQueryBuilder.getFieldName()), - new KnnVectorQueryBuilder( - filteredKnnVectorQueryBuilder, + buildNewKnnVectorQuery( SemanticTextField.getEmbeddingsFieldName(filteredKnnVectorQueryBuilder.getFieldName()), + filteredKnnVectorQueryBuilder, queryVectorBuilder ), ScoreMode.Max - ); + ).queryName(knnVectorQueryBuilder.queryName()).boost(knnVectorQueryBuilder.boost()); } private KnnVectorQueryBuilder addIndexFilterToKnnVectorQuery(Collection indices, KnnVectorQueryBuilder original) { - KnnVectorQueryBuilder copy = new KnnVectorQueryBuilder(original); + KnnVectorQueryBuilder copy; + if (original.queryVectorBuilder() != null) { + copy = new KnnVectorQueryBuilder( + original.getFieldName(), + original.queryVectorBuilder(), + original.k(), + original.numCands(), + original.getVectorSimilarity() + ); + } else { + copy = new KnnVectorQueryBuilder( + original.getFieldName(), + original.queryVector(), + original.k(), + original.numCands(), + original.rescoreVectorBuilder(), + original.getVectorSimilarity() + ); + } + + copy.addFilterQueries(original.filterQueries()); copy.addFilterQuery(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return copy; } @@ -148,6 +162,35 @@ private TextEmbeddingQueryVectorBuilder getTextEmbeddingQueryBuilderFromQuery(Kn return (TextEmbeddingQueryVectorBuilder) queryVectorBuilder; } + private KnnVectorQueryBuilder buildNewKnnVectorQuery( + String fieldName, + KnnVectorQueryBuilder original, + QueryVectorBuilder queryVectorBuilder + ) { + KnnVectorQueryBuilder newQueryBuilder; + if (original.queryVectorBuilder() != null) { + newQueryBuilder = new KnnVectorQueryBuilder( + fieldName, + queryVectorBuilder, + original.k(), + original.numCands(), + original.getVectorSimilarity() + ); + } else { + newQueryBuilder = new KnnVectorQueryBuilder( + fieldName, + original.queryVector(), + original.k(), + original.numCands(), + original.rescoreVectorBuilder(), + original.getVectorSimilarity() + ); + } + + newQueryBuilder.addFilterQueries(original.filterQueries()); + return newQueryBuilder; + } + @Override public String getQueryName() { return KnnVectorQueryBuilder.NAME; From 06406316f8c4f64ef26c8dda47e79a3c99d3ef91 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 3 Jul 2025 16:50:14 -0400 Subject: [PATCH 36/49] fix knn combined query --- .../SemanticKnnVectorQueryRewriteInterceptor.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java index 5a48c1cf9ff01..b1f5c240371f8 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticKnnVectorQueryRewriteInterceptor.java @@ -52,16 +52,20 @@ protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceI assert (queryBuilder instanceof KnnVectorQueryBuilder); KnnVectorQueryBuilder knnVectorQueryBuilder = (KnnVectorQueryBuilder) queryBuilder; Map> inferenceIdsIndices = indexInformation.getInferenceIdsIndices(); + QueryBuilder finalQueryBuilder; if (inferenceIdsIndices.size() == 1) { // Simple case, everything uses the same inference ID Map.Entry> inferenceIdIndex = inferenceIdsIndices.entrySet().iterator().next(); String searchInferenceId = inferenceIdIndex.getKey(); List indices = inferenceIdIndex.getValue(); - return buildNestedQueryFromKnnVectorQuery(knnVectorQueryBuilder, indices, searchInferenceId); + finalQueryBuilder = buildNestedQueryFromKnnVectorQuery(knnVectorQueryBuilder, indices, searchInferenceId); } else { // Multiple inference IDs, construct a boolean query - return buildInferenceQueryWithMultipleInferenceIds(knnVectorQueryBuilder, inferenceIdsIndices); + finalQueryBuilder = buildInferenceQueryWithMultipleInferenceIds(knnVectorQueryBuilder, inferenceIdsIndices); } + finalQueryBuilder.boost(queryBuilder.boost()); + finalQueryBuilder.queryName(queryBuilder.queryName()); + return finalQueryBuilder; } private QueryBuilder buildInferenceQueryWithMultipleInferenceIds( @@ -102,6 +106,8 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( ) ); } + boolQueryBuilder.boost(queryBuilder.boost()); + boolQueryBuilder.queryName(queryBuilder.queryName()); return boolQueryBuilder; } @@ -124,7 +130,7 @@ private QueryBuilder buildNestedQueryFromKnnVectorQuery( queryVectorBuilder ), ScoreMode.Max - ).queryName(knnVectorQueryBuilder.queryName()).boost(knnVectorQueryBuilder.boost()); + ); } private KnnVectorQueryBuilder addIndexFilterToKnnVectorQuery(Collection indices, KnnVectorQueryBuilder original) { From 404efcf0ae9ba70ae79842921cd5f3308003949f Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 3 Jul 2025 17:01:53 -0400 Subject: [PATCH 37/49] fix sparse combined query --- ...SemanticSparseVectorQueryRewriteInterceptor.java | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java index 54b0a802d0fd2..ef8dcba371958 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java @@ -86,7 +86,14 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( boolQueryBuilder.should( createSubQueryForIndices( indexInformation.nonInferenceIndices(), - createSubQueryForIndices(indexInformation.nonInferenceIndices(), sparseVectorQueryBuilder) + createSubQueryForIndices(indexInformation.nonInferenceIndices(), new SparseVectorQueryBuilder( + sparseVectorQueryBuilder.getFieldName(), + sparseVectorQueryBuilder.getQueryVectors(), + sparseVectorQueryBuilder.getInferenceId(), + sparseVectorQueryBuilder.getQuery(), + sparseVectorQueryBuilder.shouldPruneTokens(), + sparseVectorQueryBuilder.getTokenPruningConfig() + )) ) ); // We always perform nested subqueries on semantic_text fields, to support @@ -99,6 +106,8 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( ) ); } + boolQueryBuilder.boost(queryBuilder.boost()); + boolQueryBuilder.queryName(queryBuilder.queryName()); return boolQueryBuilder; } @@ -118,7 +127,7 @@ private QueryBuilder buildNestedQueryFromSparseVectorQuery(QueryBuilder queryBui sparseVectorQueryBuilder.getTokenPruningConfig() ), ScoreMode.Max - ).queryName(queryBuilder.queryName()).boost(queryBuilder.boost()); + ); } @Override From f73285d6be7533452e2e15f488838001cd782930 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Thu, 3 Jul 2025 17:02:38 -0400 Subject: [PATCH 38/49] fix knn yaml test for combined query --- .../test/inference/47_semantic_text_knn.yml | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml index 834471229bb88..19b2411209ad9 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml @@ -407,7 +407,7 @@ setup: --- "Apply boost and query name on single index": - requires: - cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + cluster_features: "semantic_query_rewrite_interceptors.propagate_boost_and_query_name_fix" reason: fix boosting and query name for semantic text knn queries. - skip: @@ -475,13 +475,13 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 4.9907494, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 4.9907494, error: 1e-7 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } --- "Apply boost and query name on multiple indices": - requires: - cluster_features: "semantic_text.query_rewrite.boost_and_query_name_fix" + cluster_features: "semantic_query_rewrite_interceptors.propagate_boost_and_query_name_fix" reason: fix boosting and query name for semantic text knn queries. - skip: @@ -522,7 +522,7 @@ setup: - do: index: index: index-with-dense-field - id: doc_1 + id: doc_2 body: inference_field: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] refresh: true @@ -546,10 +546,10 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - - match: { hits.hits.1._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 0.9984111, error: 1e15 } } + - match: { hits.hits.1._id: "doc_2" } + - close_to: { hits.hits.0._score: { value: 0.9981499, error: 1e-7 } } - not_exists: hits.hits.0.matched_queries - - close_to: { hits.hits.1._score: { value: 0.9984111, error: 1e15 } } + - close_to: { hits.hits.1._score: { value: 0.9428395, error: 1e-7 } } - not_exists: hits.hits.1.matched_queries - do: @@ -573,8 +573,8 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - - match: { hits.hits.1._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 4.9907494, error: 1e15 } } + - match: { hits.hits.1._id: "doc_2" } + - close_to: { hits.hits.0._score: { value: 4.9907494, error: 1e-7 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } - - close_to: { hits.hits.1._score: { value: 4.9907494, error: 1e15 } } + - close_to: { hits.hits.1._score: { value: 4.7141976, error: 1e-7 } } - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } From 96f5aa62b90f9c6b9ec77a44dd3bc4da03a9e0bc Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 4 Jul 2025 11:27:16 -0400 Subject: [PATCH 39/49] refactoring unit tests --- ...KnnVectorQueryRewriteInterceptorTests.java | 38 ++++-------- ...nticMatchQueryRewriteInterceptorTests.java | 6 +- ...rseVectorQueryRewriteInterceptorTests.java | 59 +++++-------------- 3 files changed, 30 insertions(+), 73 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java index a8c6f37ae779c..83404a2ca45f9 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java @@ -54,6 +54,8 @@ public void cleanup() { } public void testKnnQueryWithVectorBuilderIsInterceptedAndRewritten() throws IOException { + float boost = randomFloat() * 5; + String queryName = randomAlphaOfLength(5); Map inferenceFields = Map.of( FIELD_NAME, new InferenceFieldMetadata(index.getName(), INFERENCE_ID, new String[] { FIELD_NAME }, null) @@ -61,34 +63,11 @@ public void testKnnQueryWithVectorBuilderIsInterceptedAndRewritten() throws IOEx QueryRewriteContext context = createQueryRewriteContext(inferenceFields); QueryVectorBuilder queryVectorBuilder = new TextEmbeddingQueryVectorBuilder(INFERENCE_ID, QUERY); KnnVectorQueryBuilder original = new KnnVectorQueryBuilder(FIELD_NAME, queryVectorBuilder, 10, 100, null); + original.boost(boost); + original.queryName(queryName); testRewrittenInferenceQuery(context, original); } - public void testKnnQueryWithVectorBuilderIsInterceptedAndRewrittenWithBoostAndQueryName() throws IOException { - float BOOST = 5.0f; - String QUERY_NAME = "knn_query"; - - Map inferenceFields = Map.of( - FIELD_NAME, - new InferenceFieldMetadata(index.getName(), INFERENCE_ID, new String[] { FIELD_NAME }, null) - ); - QueryRewriteContext context = createQueryRewriteContext(inferenceFields); - QueryVectorBuilder queryVectorBuilder = new TextEmbeddingQueryVectorBuilder(INFERENCE_ID, QUERY); - KnnVectorQueryBuilder original = new KnnVectorQueryBuilder(FIELD_NAME, queryVectorBuilder, 10, 100, null); - original.boost(BOOST); - original.queryName(QUERY_NAME); - - testRewrittenInferenceQuery(context, original); - QueryBuilder rewritten = original.rewrite(context); - InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; - assertEquals(BOOST, intercepted.boost(), 0.0f); - assertEquals(QUERY_NAME, intercepted.queryName()); - NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; - KnnVectorQueryBuilder knnVectorQueryBuilder = (KnnVectorQueryBuilder) nestedQueryBuilder.query(); - assertEquals(BOOST, knnVectorQueryBuilder.boost(), 5.0f); - assertNull(knnVectorQueryBuilder.queryName()); - } - public void testKnnWithQueryBuilderWithoutInferenceIdIsInterceptedAndRewritten() throws IOException { Map inferenceFields = Map.of( FIELD_NAME, @@ -107,14 +86,23 @@ private void testRewrittenInferenceQuery(QueryRewriteContext context, KnnVectorQ rewritten instanceof InterceptedQueryBuilderWrapper ); InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; + assertEquals(original.boost(), intercepted.boost(), 0.0f); + assertEquals(original.queryName(), intercepted.queryName()); assertTrue(intercepted.queryBuilder instanceof NestedQueryBuilder); + NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; + assertEquals(original.boost(), nestedQueryBuilder.boost(), 0.0f); + assertEquals(original.queryName(), nestedQueryBuilder.queryName()); assertEquals(SemanticTextField.getChunksFieldName(FIELD_NAME), nestedQueryBuilder.path()); + QueryBuilder innerQuery = nestedQueryBuilder.query(); assertTrue(innerQuery instanceof KnnVectorQueryBuilder); KnnVectorQueryBuilder knnVectorQueryBuilder = (KnnVectorQueryBuilder) innerQuery; + assertEquals(1.0f, knnVectorQueryBuilder.boost(), 0.0f); + assertNull(knnVectorQueryBuilder.queryName()); assertEquals(SemanticTextField.getEmbeddingsFieldName(FIELD_NAME), knnVectorQueryBuilder.getFieldName()); assertTrue(knnVectorQueryBuilder.queryVectorBuilder() instanceof TextEmbeddingQueryVectorBuilder); + TextEmbeddingQueryVectorBuilder textEmbeddingQueryVectorBuilder = (TextEmbeddingQueryVectorBuilder) knnVectorQueryBuilder .queryVectorBuilder(); assertEquals(QUERY, textEmbeddingQueryVectorBuilder.getModelText()); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java index bc7f1a2b0b8b1..54ea2cf1df98d 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java @@ -81,13 +81,13 @@ public void testMatchQueryOnNonInferenceFieldRemainsMatchQuery() throws IOExcept assertEquals(original, rewritten); } - public void testBoostInMatchQueryRewrite() throws IOException { + public void testBoostAndQueryNameInMatchQueryRewrite() throws IOException { Map inferenceFields = Map.of( FIELD_NAME, new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) ); QueryRewriteContext context = createQueryRewriteContext(inferenceFields); - QueryBuilder original = createTestQueryBuilderWithBoost(); + QueryBuilder original = createTestQueryBuilderWithBoostAndQueryName(); QueryBuilder rewritten = original.rewrite(context); assertTrue( "Expected query to be intercepted, but was [" + rewritten.getClass().getName() + "]", @@ -106,7 +106,7 @@ private MatchQueryBuilder createTestQueryBuilder() { return new MatchQueryBuilder(FIELD_NAME, VALUE); } - private MatchQueryBuilder createTestQueryBuilderWithBoost() { + private MatchQueryBuilder createTestQueryBuilderWithBoostAndQueryName() { MatchQueryBuilder queryBuilder = createTestQueryBuilder(); queryBuilder.boost(BOOST); queryBuilder.queryName(QUERY_NAME); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java index 70867ea441e74..533df90ee7d0d 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java @@ -52,27 +52,17 @@ public void cleanup() { } public void testSparseVectorQueryOnInferenceFieldIsInterceptedAndRewritten() throws IOException { + float boost = randomFloat() * 5; + String queryName = randomAlphaOfLength(5); Map inferenceFields = Map.of( FIELD_NAME, new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) ); QueryRewriteContext context = createQueryRewriteContext(inferenceFields); QueryBuilder original = new SparseVectorQueryBuilder(FIELD_NAME, INFERENCE_ID, QUERY); - QueryBuilder rewritten = original.rewrite(context); - assertTrue( - "Expected query to be intercepted, but was [" + rewritten.getClass().getName() + "]", - rewritten instanceof InterceptedQueryBuilderWrapper - ); - InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; - assertTrue(intercepted.queryBuilder instanceof NestedQueryBuilder); - NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; - assertEquals(SemanticTextField.getChunksFieldName(FIELD_NAME), nestedQueryBuilder.path()); - QueryBuilder innerQuery = nestedQueryBuilder.query(); - assertTrue(innerQuery instanceof SparseVectorQueryBuilder); - SparseVectorQueryBuilder sparseVectorQueryBuilder = (SparseVectorQueryBuilder) innerQuery; - assertEquals(SemanticTextField.getEmbeddingsFieldName(FIELD_NAME), sparseVectorQueryBuilder.getFieldName()); - assertEquals(INFERENCE_ID, sparseVectorQueryBuilder.getInferenceId()); - assertEquals(QUERY, sparseVectorQueryBuilder.getQuery()); + original.boost(boost); + original.queryName(queryName); + testRewrittenInferenceQuery(context, original); } public void testSparseVectorQueryOnInferenceFieldWithoutInferenceIdIsInterceptedAndRewritten() throws IOException { @@ -82,21 +72,7 @@ public void testSparseVectorQueryOnInferenceFieldWithoutInferenceIdIsIntercepted ); QueryRewriteContext context = createQueryRewriteContext(inferenceFields); QueryBuilder original = new SparseVectorQueryBuilder(FIELD_NAME, null, QUERY); - QueryBuilder rewritten = original.rewrite(context); - assertTrue( - "Expected query to be intercepted, but was [" + rewritten.getClass().getName() + "]", - rewritten instanceof InterceptedQueryBuilderWrapper - ); - InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; - assertTrue(intercepted.queryBuilder instanceof NestedQueryBuilder); - NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; - assertEquals(SemanticTextField.getChunksFieldName(FIELD_NAME), nestedQueryBuilder.path()); - QueryBuilder innerQuery = nestedQueryBuilder.query(); - assertTrue(innerQuery instanceof SparseVectorQueryBuilder); - SparseVectorQueryBuilder sparseVectorQueryBuilder = (SparseVectorQueryBuilder) innerQuery; - assertEquals(SemanticTextField.getEmbeddingsFieldName(FIELD_NAME), sparseVectorQueryBuilder.getFieldName()); - assertEquals(INFERENCE_ID, sparseVectorQueryBuilder.getInferenceId()); - assertEquals(QUERY, sparseVectorQueryBuilder.getQuery()); + testRewrittenInferenceQuery(context, original); } public void testSparseVectorQueryOnNonInferenceFieldRemainsUnchanged() throws IOException { @@ -110,36 +86,29 @@ public void testSparseVectorQueryOnNonInferenceFieldRemainsUnchanged() throws IO assertEquals(original, rewritten); } - public void testBoostAndQueryNameOnSparseVectorQueryRewrite() throws IOException { - float BOOST = 5.0f; - String QUERY_NAME = "sparse_vector_query"; - - Map inferenceFields = Map.of( - FIELD_NAME, - new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) - ); - QueryRewriteContext context = createQueryRewriteContext(inferenceFields); - QueryBuilder original = new SparseVectorQueryBuilder(FIELD_NAME, INFERENCE_ID, QUERY); - original.boost(BOOST); - original.queryName(QUERY_NAME); + private void testRewrittenInferenceQuery(QueryRewriteContext context, QueryBuilder original) throws IOException { QueryBuilder rewritten = original.rewrite(context); assertTrue( "Expected query to be intercepted, but was [" + rewritten.getClass().getName() + "]", rewritten instanceof InterceptedQueryBuilderWrapper ); InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; - assertEquals(BOOST, intercepted.boost(), 0.0f); - assertEquals(QUERY_NAME, intercepted.queryName()); + assertEquals(original.boost(), intercepted.boost(), 0.0f); + assertEquals(original.queryName(), intercepted.queryName()); + assertTrue(intercepted.queryBuilder instanceof NestedQueryBuilder); NestedQueryBuilder nestedQueryBuilder = (NestedQueryBuilder) intercepted.queryBuilder; assertEquals(SemanticTextField.getChunksFieldName(FIELD_NAME), nestedQueryBuilder.path()); + assertEquals(original.boost(), nestedQueryBuilder.boost(), 0.0f); + assertEquals(original.queryName(), nestedQueryBuilder.queryName()); + QueryBuilder innerQuery = nestedQueryBuilder.query(); assertTrue(innerQuery instanceof SparseVectorQueryBuilder); SparseVectorQueryBuilder sparseVectorQueryBuilder = (SparseVectorQueryBuilder) innerQuery; assertEquals(SemanticTextField.getEmbeddingsFieldName(FIELD_NAME), sparseVectorQueryBuilder.getFieldName()); assertEquals(INFERENCE_ID, sparseVectorQueryBuilder.getInferenceId()); assertEquals(QUERY, sparseVectorQueryBuilder.getQuery()); - assertEquals(BOOST, sparseVectorQueryBuilder.boost(), 5.0f); + assertEquals(1.0f, sparseVectorQueryBuilder.boost(), 0.0f); assertNull(sparseVectorQueryBuilder.queryName()); } From 3065e5b805d2c3ef00d349c6700b6e51bf71613a Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 4 Jul 2025 11:28:09 -0400 Subject: [PATCH 40/49] linting --- .../SemanticQueryRewriteInterceptor.java | 8 +++++++- ...icSparseVectorQueryRewriteInterceptor.java | 19 +++++++++++-------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index 5348651d3a5b0..c504ab0ac727d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -114,7 +114,13 @@ protected QueryBuilder createSubQueryForIndices(Collection indices, Quer return boolQueryBuilder; } - protected QueryBuilder createSemanticSubQuery(Collection indices, String fieldName, String value, float boost, String queryName) { + protected QueryBuilder createSemanticSubQuery( + Collection indices, + String fieldName, + String value, + float boost, + String queryName + ) { BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); boolQueryBuilder.must(new SemanticQueryBuilder(fieldName, value, true)); boolQueryBuilder.boost(boost); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java index ef8dcba371958..6aee25bb8eabf 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java @@ -86,14 +86,17 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( boolQueryBuilder.should( createSubQueryForIndices( indexInformation.nonInferenceIndices(), - createSubQueryForIndices(indexInformation.nonInferenceIndices(), new SparseVectorQueryBuilder( - sparseVectorQueryBuilder.getFieldName(), - sparseVectorQueryBuilder.getQueryVectors(), - sparseVectorQueryBuilder.getInferenceId(), - sparseVectorQueryBuilder.getQuery(), - sparseVectorQueryBuilder.shouldPruneTokens(), - sparseVectorQueryBuilder.getTokenPruningConfig() - )) + createSubQueryForIndices( + indexInformation.nonInferenceIndices(), + new SparseVectorQueryBuilder( + sparseVectorQueryBuilder.getFieldName(), + sparseVectorQueryBuilder.getQueryVectors(), + sparseVectorQueryBuilder.getInferenceId(), + sparseVectorQueryBuilder.getQuery(), + sparseVectorQueryBuilder.shouldPruneTokens(), + sparseVectorQueryBuilder.getTokenPruningConfig() + ) + ) ) ); // We always perform nested subqueries on semantic_text fields, to support From 828d8c28f815a723eeb93e37e7d43a6f861893ad Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 4 Jul 2025 11:49:45 -0400 Subject: [PATCH 41/49] fix match query unit test --- .../query/SemanticMatchQueryRewriteInterceptorTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java index 54ea2cf1df98d..01f300000d211 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java @@ -94,12 +94,12 @@ public void testBoostAndQueryNameInMatchQueryRewrite() throws IOException { rewritten instanceof InterceptedQueryBuilderWrapper ); InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; + assertEquals(5.0, intercepted.boost(), 0.0f); + assertEquals(QUERY_NAME, intercepted.queryName()); assertTrue(intercepted.queryBuilder instanceof SemanticQueryBuilder); SemanticQueryBuilder semanticQueryBuilder = (SemanticQueryBuilder) intercepted.queryBuilder; assertEquals(FIELD_NAME, semanticQueryBuilder.getFieldName()); assertEquals(VALUE, semanticQueryBuilder.getQuery()); - assertEquals(BOOST, semanticQueryBuilder.boost(), 0.0f); - assertEquals(QUERY_NAME, semanticQueryBuilder.queryName()); } private MatchQueryBuilder createTestQueryBuilder() { From d08dbdda598a10e04ef8a7d04eeb5c0514c29509 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Tue, 8 Jul 2025 09:06:22 -0400 Subject: [PATCH 42/49] adding copy constructor for match query --- .../index/query/MatchQueryBuilder.java | 19 +++++++++++++++++++ .../SemanticMatchQueryRewriteInterceptor.java | 10 ++++++---- .../SemanticQueryRewriteInterceptor.java | 10 +--------- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java index fd704d39ca384..a9d8efbd11bb9 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java @@ -120,6 +120,25 @@ public MatchQueryBuilder(StreamInput in) throws IOException { autoGenerateSynonymsPhraseQuery = in.readBoolean(); } + /** + * Copy constructor from a QueryBuilder. + */ + public MatchQueryBuilder(MatchQueryBuilder queryBuilder) { + this(queryBuilder.fieldName, queryBuilder.value); + + this.operator = queryBuilder.operator(); + this.prefixLength = queryBuilder.prefixLength(); + this.maxExpansions = queryBuilder.maxExpansions(); + this.fuzzyTranspositions = queryBuilder.fuzzyTranspositions(); + this.lenient = queryBuilder.lenient(); + this.zeroTermsQuery = queryBuilder.zeroTermsQuery(); + this.analyzer = queryBuilder.analyzer(); + this.minimumShouldMatch = queryBuilder.minimumShouldMatch(); + this.fuzzyRewrite = queryBuilder.fuzzyRewrite(); + this.fuzziness = queryBuilder.fuzziness(); + this.autoGenerateSynonymsPhraseQuery = queryBuilder.autoGenerateSynonymsPhraseQuery(); + } + @Override protected void doWriteTo(StreamOutput out) throws IOException { out.writeString(fieldName); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index b06cfbbeb7231..ea25cb8ee8143 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -48,18 +48,20 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( InferenceIndexInformationForField indexInformation ) { assert (queryBuilder instanceof MatchQueryBuilder); - MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) queryBuilder; + MatchQueryBuilder originalMatchQueryBuilder = (MatchQueryBuilder) queryBuilder; + MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder(originalMatchQueryBuilder); + BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); boolQueryBuilder.should( createSemanticSubQuery( indexInformation.getInferenceIndices(), matchQueryBuilder.fieldName(), - (String) matchQueryBuilder.value(), - matchQueryBuilder.boost(), - matchQueryBuilder.queryName() + (String) matchQueryBuilder.value() ) ); boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder)); + boolQueryBuilder.boost(queryBuilder.boost()); + boolQueryBuilder.queryName(queryBuilder.queryName()); return boolQueryBuilder; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java index c504ab0ac727d..bb76ef0be24e9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryRewriteInterceptor.java @@ -114,17 +114,9 @@ protected QueryBuilder createSubQueryForIndices(Collection indices, Quer return boolQueryBuilder; } - protected QueryBuilder createSemanticSubQuery( - Collection indices, - String fieldName, - String value, - float boost, - String queryName - ) { + protected QueryBuilder createSemanticSubQuery(Collection indices, String fieldName, String value) { BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); boolQueryBuilder.must(new SemanticQueryBuilder(fieldName, value, true)); - boolQueryBuilder.boost(boost); - boolQueryBuilder.queryName(queryName); boolQueryBuilder.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, indices)); return boolQueryBuilder; } From 916b1cca21815a08a35a9484d8a40bd2fb21c7ff Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Tue, 8 Jul 2025 13:24:35 -0400 Subject: [PATCH 43/49] refactor copy match builder to intercepter --- .../index/query/MatchQueryBuilder.java | 19 --------------- .../SemanticMatchQueryRewriteInterceptor.java | 23 ++++++++++++++++++- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java index a9d8efbd11bb9..fd704d39ca384 100644 --- a/server/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java @@ -120,25 +120,6 @@ public MatchQueryBuilder(StreamInput in) throws IOException { autoGenerateSynonymsPhraseQuery = in.readBoolean(); } - /** - * Copy constructor from a QueryBuilder. - */ - public MatchQueryBuilder(MatchQueryBuilder queryBuilder) { - this(queryBuilder.fieldName, queryBuilder.value); - - this.operator = queryBuilder.operator(); - this.prefixLength = queryBuilder.prefixLength(); - this.maxExpansions = queryBuilder.maxExpansions(); - this.fuzzyTranspositions = queryBuilder.fuzzyTranspositions(); - this.lenient = queryBuilder.lenient(); - this.zeroTermsQuery = queryBuilder.zeroTermsQuery(); - this.analyzer = queryBuilder.analyzer(); - this.minimumShouldMatch = queryBuilder.minimumShouldMatch(); - this.fuzzyRewrite = queryBuilder.fuzzyRewrite(); - this.fuzziness = queryBuilder.fuzziness(); - this.autoGenerateSynonymsPhraseQuery = queryBuilder.autoGenerateSynonymsPhraseQuery(); - } - @Override protected void doWriteTo(StreamOutput out) throws IOException { out.writeString(fieldName); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java index ea25cb8ee8143..a6599afc66c3f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticMatchQueryRewriteInterceptor.java @@ -49,7 +49,8 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( ) { assert (queryBuilder instanceof MatchQueryBuilder); MatchQueryBuilder originalMatchQueryBuilder = (MatchQueryBuilder) queryBuilder; - MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder(originalMatchQueryBuilder); + // Create a copy for non-inference fields without boost and _name + MatchQueryBuilder matchQueryBuilder = copyMatchQueryBuilder(originalMatchQueryBuilder); BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); boolQueryBuilder.should( @@ -69,4 +70,24 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( public String getQueryName() { return MatchQueryBuilder.NAME; } + + private MatchQueryBuilder copyMatchQueryBuilder(MatchQueryBuilder queryBuilder) { + MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder(queryBuilder.fieldName(), queryBuilder.value()); + matchQueryBuilder.operator(queryBuilder.operator()); + matchQueryBuilder.prefixLength(queryBuilder.prefixLength()); + matchQueryBuilder.maxExpansions(queryBuilder.maxExpansions()); + matchQueryBuilder.fuzzyTranspositions(queryBuilder.fuzzyTranspositions()); + matchQueryBuilder.lenient(queryBuilder.lenient()); + matchQueryBuilder.zeroTermsQuery(queryBuilder.zeroTermsQuery()); + matchQueryBuilder.analyzer(queryBuilder.analyzer()); + matchQueryBuilder.minimumShouldMatch(queryBuilder.minimumShouldMatch()); + matchQueryBuilder.fuzzyRewrite(queryBuilder.fuzzyRewrite()); + + if (queryBuilder.fuzziness() != null) { + matchQueryBuilder.fuzziness(queryBuilder.fuzziness()); + } + + matchQueryBuilder.autoGenerateSynonymsPhraseQuery(queryBuilder.autoGenerateSynonymsPhraseQuery()); + return matchQueryBuilder; + } } From 8ddda3cbecf883ca383ab82358a797619835344c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 9 Jul 2025 16:01:14 +0000 Subject: [PATCH 44/49] [CI] Auto commit changes from spotless --- ...icSparseVectorQueryRewriteInterceptor.java | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java index ce67776e89dc3..c85a21f10301d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticSparseVectorQueryRewriteInterceptor.java @@ -83,14 +83,19 @@ protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery( Map> inferenceIdsIndices = indexInformation.getInferenceIdsIndices(); BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), new SparseVectorQueryBuilder( - sparseVectorQueryBuilder.getFieldName(), - sparseVectorQueryBuilder.getQueryVectors(), - sparseVectorQueryBuilder.getInferenceId(), - sparseVectorQueryBuilder.getQuery(), - sparseVectorQueryBuilder.shouldPruneTokens(), - sparseVectorQueryBuilder.getTokenPruningConfig() - ))); + boolQueryBuilder.should( + createSubQueryForIndices( + indexInformation.nonInferenceIndices(), + new SparseVectorQueryBuilder( + sparseVectorQueryBuilder.getFieldName(), + sparseVectorQueryBuilder.getQueryVectors(), + sparseVectorQueryBuilder.getInferenceId(), + sparseVectorQueryBuilder.getQuery(), + sparseVectorQueryBuilder.shouldPruneTokens(), + sparseVectorQueryBuilder.getTokenPruningConfig() + ) + ) + ); // We always perform nested subqueries on semantic_text fields, to support // sparse_vector queries using query vectors. for (String inferenceId : inferenceIdsIndices.keySet()) { From 5dcfc1b3785551275ca229659ede89aa69332766 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 11 Jul 2025 17:11:38 -0400 Subject: [PATCH 45/49] fix unit tests --- .../SemanticKnnVectorQueryRewriteInterceptorTests.java | 6 +++++- .../query/SemanticMatchQueryRewriteInterceptorTests.java | 2 +- .../SemanticSparseVectorQueryRewriteInterceptorTests.java | 6 +++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java index 83404a2ca45f9..3238e94f34ba3 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java @@ -54,7 +54,7 @@ public void cleanup() { } public void testKnnQueryWithVectorBuilderIsInterceptedAndRewritten() throws IOException { - float boost = randomFloat() * 5; + float boost = randomFloatBetween(1, 10, true); String queryName = randomAlphaOfLength(5); Map inferenceFields = Map.of( FIELD_NAME, @@ -69,6 +69,8 @@ public void testKnnQueryWithVectorBuilderIsInterceptedAndRewritten() throws IOEx } public void testKnnWithQueryBuilderWithoutInferenceIdIsInterceptedAndRewritten() throws IOException { + float boost = randomFloatBetween(1, 10, true); + String queryName = randomAlphaOfLength(5); Map inferenceFields = Map.of( FIELD_NAME, new InferenceFieldMetadata(index.getName(), INFERENCE_ID, new String[] { FIELD_NAME }, null) @@ -76,6 +78,8 @@ public void testKnnWithQueryBuilderWithoutInferenceIdIsInterceptedAndRewritten() QueryRewriteContext context = createQueryRewriteContext(inferenceFields); QueryVectorBuilder queryVectorBuilder = new TextEmbeddingQueryVectorBuilder(null, QUERY); KnnVectorQueryBuilder original = new KnnVectorQueryBuilder(FIELD_NAME, queryVectorBuilder, 10, 100, null); + original.boost(boost); + original.queryName(queryName); testRewrittenInferenceQuery(context, original); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java index 01f300000d211..22a91613b1498 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java @@ -94,7 +94,7 @@ public void testBoostAndQueryNameInMatchQueryRewrite() throws IOException { rewritten instanceof InterceptedQueryBuilderWrapper ); InterceptedQueryBuilderWrapper intercepted = (InterceptedQueryBuilderWrapper) rewritten; - assertEquals(5.0, intercepted.boost(), 0.0f); + assertEquals(BOOST, intercepted.boost(), 0.0f); assertEquals(QUERY_NAME, intercepted.queryName()); assertTrue(intercepted.queryBuilder instanceof SemanticQueryBuilder); SemanticQueryBuilder semanticQueryBuilder = (SemanticQueryBuilder) intercepted.queryBuilder; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java index 533df90ee7d0d..3ee9ae49ff30e 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java @@ -52,7 +52,7 @@ public void cleanup() { } public void testSparseVectorQueryOnInferenceFieldIsInterceptedAndRewritten() throws IOException { - float boost = randomFloat() * 5; + float boost = randomFloatBetween(1, 10, true); String queryName = randomAlphaOfLength(5); Map inferenceFields = Map.of( FIELD_NAME, @@ -66,12 +66,16 @@ public void testSparseVectorQueryOnInferenceFieldIsInterceptedAndRewritten() thr } public void testSparseVectorQueryOnInferenceFieldWithoutInferenceIdIsInterceptedAndRewritten() throws IOException { + float boost = randomFloatBetween(1, 10, true); + String queryName = randomAlphaOfLength(5); Map inferenceFields = Map.of( FIELD_NAME, new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) ); QueryRewriteContext context = createQueryRewriteContext(inferenceFields); QueryBuilder original = new SparseVectorQueryBuilder(FIELD_NAME, null, QUERY); + original.boost(boost); + original.queryName(queryName); testRewrittenInferenceQuery(context, original); } From 469f598b8278e8f30b680843ef2ef46a76e7c4d7 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 11 Jul 2025 17:24:13 -0400 Subject: [PATCH 46/49] update yaml tests --- .../test/inference/45_semantic_text_match.yml | 8 ++++---- .../test/inference/46_semantic_text_sparse_vector.yml | 8 ++++---- .../rest-api-spec/test/inference/47_semantic_text_knn.yml | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml index 8f6c318fb4d7b..4980904b8c222 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml @@ -322,7 +322,7 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e-7 } } - not_exists: hits.hits.0.matched_queries - do: @@ -341,7 +341,7 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e-7 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } --- @@ -406,7 +406,7 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - match: { hits.hits.1._id: "doc_2" } - - close_to: { hits.hits.0._score: { value: 1.1140361E19, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 1.1140361E19, error: 1e-7 } } - not_exists: hits.hits.0.matched_queries - close_to: { hits.hits.1._score: { value: 0.2876821, error: 1e-7 } } - not_exists: hits.hits.1.matched_queries @@ -428,7 +428,7 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - match: { hits.hits.1._id: "doc_2" } - - close_to: { hits.hits.0._score: { value: 5.5701804E19, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 5.5701804E19, error: 1e-6 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } - close_to: { hits.hits.1._score: { value: 1.4384103, error: 1e-7 } } - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml index 3e99af024e4f7..448fcc8907bf3 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml @@ -291,7 +291,7 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e-7 } } - not_exists: hits.hits.0.matched_queries - do: @@ -310,7 +310,7 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e-7 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } --- @@ -376,7 +376,7 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - match: { hits.hits.1._id: "doc_2" } - - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e-7 } } - not_exists: hits.hits.0.matched_queries - close_to: { hits.hits.1._score: { value: 1.3455845E10, error: 1e-7 } } - not_exists: hits.hits.1.matched_queries @@ -399,7 +399,7 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - match: { hits.hits.1._id: "doc_2" } - - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e-7 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } - close_to: { hits.hits.1._score: { value: 6.7279225E10, error: 1e-7 } } - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml index 19b2411209ad9..e3428d541c583 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml @@ -452,7 +452,7 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 0.9984111, error: 1e15 } } + - close_to: { hits.hits.0._score: { value: 0.9981499, error: 1e-7 } } - not_exists: hits.hits.0.matched_queries - do: From 375ae363cb07e090baf422cdcfbcc6b9c7a1fac7 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Fri, 11 Jul 2025 17:26:03 -0400 Subject: [PATCH 47/49] fix match yaml test --- .../rest-api-spec/test/inference/45_semantic_text_match.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml index 4980904b8c222..cc9f5bf65992c 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml @@ -428,7 +428,7 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - match: { hits.hits.1._id: "doc_2" } - - close_to: { hits.hits.0._score: { value: 5.5701804E19, error: 1e-6 } } + - close_to: { hits.hits.0._score: { value: 5.5701804E19, error: 1e-7 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } - close_to: { hits.hits.1._score: { value: 1.4384103, error: 1e-7 } } - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } From 768b8f6d3318a45cf4e09258b8c9e8e78cabb9e7 Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 16 Jul 2025 10:25:33 -0400 Subject: [PATCH 48/49] fix yaml tests with 4 digits error margin --- .../test/inference/45_semantic_text_match.yml | 64 ++++--------- .../46_semantic_text_sparse_vector.yml | 87 +++-------------- .../test/inference/47_semantic_text_knn.yml | 93 ++++--------------- 3 files changed, 46 insertions(+), 198 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml index cc9f5bf65992c..3898eb7de7c29 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/45_semantic_text_match.yml @@ -287,25 +287,13 @@ setup: - skip: features: [ "headers", "close_to" ] - - do: - indices.create: - index: test-sparse-index-random - body: - settings: - number_of_shards: 1 - number_of_replicas: 0 - mappings: - properties: - inference_field: - type: semantic_text - inference_id: sparse-inference-id - - do: index: - index: test-sparse-index-random + index: test-sparse-index id: doc_1 body: inference_field: [ "It was a beautiful game", "Very competitive" ] + non_inference_field: "non inference test" refresh: true - do: @@ -313,7 +301,7 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: test-sparse-index-random + index: test-sparse-index body: query: match: @@ -322,7 +310,7 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e-7 } } + - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e15 } } - not_exists: hits.hits.0.matched_queries - do: @@ -330,7 +318,7 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: test-sparse-index-random + index: test-sparse-index body: query: match: @@ -341,7 +329,7 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e-7 } } + - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e16 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } --- @@ -353,42 +341,22 @@ setup: - skip: features: [ "headers", "close_to" ] - - do: - indices.create: - index: index-with-semantic-field - body: - settings: - number_of_shards: 1 - number_of_replicas: 0 - mappings: - properties: - inference_field: - type: semantic_text - inference_id: sparse-inference-id - - - do: - indices.create: - index: index-with-text-field - body: - mappings: - properties: - inference_field: - type: text - - do: index: - index: index-with-semantic-field + index: test-sparse-index id: doc_1 body: inference_field: [ "It was a beautiful game", "Very competitive" ] + non_inference_field: "non inference test" refresh: true - do: index: - index: index-with-text-field + index: test-text-only-index id: doc_2 body: inference_field: [ "It was a beautiful game", "Very competitive" ] + non_inference_field: "non inference test" refresh: true - do: @@ -396,7 +364,7 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: index-with-semantic-field,index-with-text-field + index: test-sparse-index,test-text-only-index body: query: match: @@ -406,9 +374,9 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - match: { hits.hits.1._id: "doc_2" } - - close_to: { hits.hits.0._score: { value: 1.1140361E19, error: 1e-7 } } + - close_to: { hits.hits.0._score: { value: 1.1140361E19, error: 1e16 } } - not_exists: hits.hits.0.matched_queries - - close_to: { hits.hits.1._score: { value: 0.2876821, error: 1e-7 } } + - close_to: { hits.hits.1._score: { value: 0.2876821, error: 1e-4 } } - not_exists: hits.hits.1.matched_queries - do: @@ -416,7 +384,7 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: index-with-semantic-field,index-with-text-field + index: test-sparse-index,test-text-only-index body: query: match: @@ -428,7 +396,7 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - match: { hits.hits.1._id: "doc_2" } - - close_to: { hits.hits.0._score: { value: 5.5701804E19, error: 1e-7 } } + - close_to: { hits.hits.0._score: { value: 5.5701804E19, error: 1e16 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } - - close_to: { hits.hits.1._score: { value: 1.4384103, error: 1e-7 } } + - close_to: { hits.hits.1._score: { value: 1.4384103, error: 1e-4 } } - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml index 448fcc8907bf3..cc67b9235f0b4 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/46_semantic_text_sparse_vector.yml @@ -256,42 +256,21 @@ setup: - skip: features: [ "headers", "close_to" ] - - do: - indices.create: - index: test-sparse-index-random - body: - settings: - number_of_shards: 1 - number_of_replicas: 0 - mappings: - properties: - inference_field: - type: semantic_text - inference_id: sparse-inference-id - - - do: - index: - index: test-sparse-index-random - id: doc_1 - body: - inference_field: [ "It was a beautiful game", "Very competitive" ] - refresh: true - - do: headers: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: test-sparse-index-random + index: test-semantic-text-index body: query: sparse_vector: field: inference_field - query: "soccer" + query: "inference test" - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e-7 } } + - close_to: { hits.hits.0._score: { value: 3.7837332E17, error: 1e14 } } - not_exists: hits.hits.0.matched_queries - do: @@ -299,18 +278,18 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: test-sparse-index-random + index: test-semantic-text-index body: query: sparse_vector: field: inference_field - query: "soccer" + query: "inference test" boost: 5.0 _name: i-like-naming-my-queries - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e-7 } } + - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } --- @@ -322,63 +301,25 @@ setup: - skip: features: [ "headers", "close_to" ] - - do: - indices.create: - index: index-with-semantic-field - body: - settings: - number_of_shards: 1 - number_of_replicas: 0 - mappings: - properties: - inference_field: - type: semantic_text - inference_id: sparse-inference-id - - - do: - indices.create: - index: index-with-sparse-field - body: - mappings: - properties: - inference_field: - type: sparse_vector - - - do: - index: - index: index-with-semantic-field - id: doc_1 - body: - inference_field: [ "It was a beautiful game", "Very competitive" ] - refresh: true - - - do: - index: - index: index-with-sparse-field - id: doc_2 - body: - inference_field: { "feature_0": 1, "feature_1": 2, "feature_2": 3, "feature_3": 4, "feature_4": 5 } - refresh: true - - do: headers: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: index-with-semantic-field,index-with-sparse-field + index: test-semantic-text-index,test-sparse-vector-index body: query: sparse_vector: field: inference_field - query: "soccer" + query: "inference test" inference_id: sparse-inference-id - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - match: { hits.hits.1._id: "doc_2" } - - close_to: { hits.hits.0._score: { value: 5.700229E18, error: 1e-7 } } + - close_to: { hits.hits.0._score: { value: 3.7837332E17, error: 1e14 } } - not_exists: hits.hits.0.matched_queries - - close_to: { hits.hits.1._score: { value: 1.3455845E10, error: 1e-7 } } + - close_to: { hits.hits.1._score: { value: 7.314424E8, error: 1e5 } } - not_exists: hits.hits.1.matched_queries - do: @@ -386,12 +327,12 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: index-with-semantic-field,index-with-sparse-field + index: test-semantic-text-index,test-sparse-vector-index body: query: sparse_vector: field: inference_field - query: "soccer" + query: "inference test" inference_id: sparse-inference-id boost: 5.0 _name: i-like-naming-my-queries @@ -399,7 +340,7 @@ setup: - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - match: { hits.hits.1._id: "doc_2" } - - close_to: { hits.hits.0._score: { value: 2.8501142E19, error: 1e-7 } } + - close_to: { hits.hits.0._score: { value: 1.8918664E18, error: 1e15 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } - - close_to: { hits.hits.1._score: { value: 6.7279225E10, error: 1e-7 } } + - close_to: { hits.hits.1._score: { value: 3.657212E9, error: 1e6 } } - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml index e3428d541c583..d49e3a63848e3 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/47_semantic_text_knn.yml @@ -413,33 +413,12 @@ setup: - skip: features: [ "headers", "close_to" ] - - do: - indices.create: - index: test-dense-index-random - body: - settings: - number_of_shards: 1 - number_of_replicas: 0 - mappings: - properties: - inference_field: - type: semantic_text - inference_id: dense-inference-id - - - do: - index: - index: test-dense-index-random - id: doc_1 - body: - inference_field: [ "It was a beautiful game", "Very competitive" ] - refresh: true - - do: headers: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: test-dense-index-random + index: test-semantic-text-index body: query: knn: @@ -448,11 +427,11 @@ setup: num_candidates: 100 query_vector_builder: text_embedding: - model_text: soccer + model_text: test - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 0.9981499, error: 1e-7 } } + - close_to: { hits.hits.0._score: { value: 0.9990483, error: 1e-4 } } - not_exists: hits.hits.0.matched_queries - do: @@ -460,7 +439,7 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: test-dense-index-random + index: test-semantic-text-index body: query: knn: @@ -469,13 +448,13 @@ setup: num_candidates: 100 query_vector_builder: text_embedding: - model_text: soccer + model_text: test boost: 5.0 _name: i-like-naming-my-queries - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } - - close_to: { hits.hits.0._score: { value: 4.9907494, error: 1e-7 } } + - close_to: { hits.hits.0._score: { value: 4.9952416, error: 1e-3 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } --- @@ -487,52 +466,12 @@ setup: - skip: features: [ "headers", "close_to" ] - - do: - indices.create: - index: index-with-semantic-field - body: - settings: - number_of_shards: 1 - number_of_replicas: 0 - mappings: - properties: - inference_field: - type: semantic_text - inference_id: dense-inference-id - - - do: - indices.create: - index: index-with-dense-field - body: - mappings: - properties: - inference_field: - type: dense_vector - dims: 10 - similarity: cosine - - - do: - index: - index: index-with-semantic-field - id: doc_1 - body: - inference_field: [ "It was a beautiful game", "Very competitive" ] - refresh: true - - - do: - index: - index: index-with-dense-field - id: doc_2 - body: - inference_field: [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] - refresh: true - - do: headers: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: index-with-semantic-field,index-with-dense-field + index: test-semantic-text-index,test-dense-vector-index body: query: knn: @@ -541,15 +480,15 @@ setup: num_candidates: 100 query_vector_builder: text_embedding: - model_text: soccer + model_text: test model_id: dense-inference-id - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - - match: { hits.hits.1._id: "doc_2" } - - close_to: { hits.hits.0._score: { value: 0.9981499, error: 1e-7 } } + - match: { hits.hits.1._id: "doc_3" } + - close_to: { hits.hits.0._score: { value: 0.9990483, error: 1e-4 } } - not_exists: hits.hits.0.matched_queries - - close_to: { hits.hits.1._score: { value: 0.9428395, error: 1e-7 } } + - close_to: { hits.hits.1._score: { value: 0.9439374, error: 1e-4 } } - not_exists: hits.hits.1.matched_queries - do: @@ -557,7 +496,7 @@ setup: # Force JSON content type so that we use a parser that interprets the floating-point score as a double Content-Type: application/json search: - index: index-with-semantic-field,index-with-dense-field + index: test-semantic-text-index,test-dense-vector-index body: query: knn: @@ -566,15 +505,15 @@ setup: num_candidates: 100 query_vector_builder: text_embedding: - model_text: soccer + model_text: test model_id: dense-inference-id boost: 5.0 _name: i-like-naming-my-queries - match: { hits.total.value: 2 } - match: { hits.hits.0._id: "doc_1" } - - match: { hits.hits.1._id: "doc_2" } - - close_to: { hits.hits.0._score: { value: 4.9907494, error: 1e-7 } } + - match: { hits.hits.1._id: "doc_3" } + - close_to: { hits.hits.0._score: { value: 4.9952416, error: 1e-3 } } - match: { hits.hits.0.matched_queries: [ "i-like-naming-my-queries" ] } - - close_to: { hits.hits.1._score: { value: 4.7141976, error: 1e-7 } } + - close_to: { hits.hits.1._score: { value: 4.719687, error: 1e-3 } } - match: { hits.hits.1.matched_queries: [ "i-like-naming-my-queries" ] } From 98cba31f5b75174186fdcd6d8989f692eac5cddf Mon Sep 17 00:00:00 2001 From: Samiul Monir Date: Wed, 16 Jul 2025 10:40:53 -0400 Subject: [PATCH 49/49] unit tests are now more randomized --- ...KnnVectorQueryRewriteInterceptorTests.java | 24 ++++++++++++------- ...nticMatchQueryRewriteInterceptorTests.java | 11 +++------ ...rseVectorQueryRewriteInterceptorTests.java | 24 ++++++++++++------- 3 files changed, 35 insertions(+), 24 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java index 3238e94f34ba3..1f0b56e3d6848 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticKnnVectorQueryRewriteInterceptorTests.java @@ -54,8 +54,6 @@ public void cleanup() { } public void testKnnQueryWithVectorBuilderIsInterceptedAndRewritten() throws IOException { - float boost = randomFloatBetween(1, 10, true); - String queryName = randomAlphaOfLength(5); Map inferenceFields = Map.of( FIELD_NAME, new InferenceFieldMetadata(index.getName(), INFERENCE_ID, new String[] { FIELD_NAME }, null) @@ -63,14 +61,18 @@ public void testKnnQueryWithVectorBuilderIsInterceptedAndRewritten() throws IOEx QueryRewriteContext context = createQueryRewriteContext(inferenceFields); QueryVectorBuilder queryVectorBuilder = new TextEmbeddingQueryVectorBuilder(INFERENCE_ID, QUERY); KnnVectorQueryBuilder original = new KnnVectorQueryBuilder(FIELD_NAME, queryVectorBuilder, 10, 100, null); - original.boost(boost); - original.queryName(queryName); + if (randomBoolean()) { + float boost = randomFloatBetween(1, 10, randomBoolean()); + original.boost(boost); + } + if (randomBoolean()) { + String queryName = randomAlphaOfLength(5); + original.queryName(queryName); + } testRewrittenInferenceQuery(context, original); } public void testKnnWithQueryBuilderWithoutInferenceIdIsInterceptedAndRewritten() throws IOException { - float boost = randomFloatBetween(1, 10, true); - String queryName = randomAlphaOfLength(5); Map inferenceFields = Map.of( FIELD_NAME, new InferenceFieldMetadata(index.getName(), INFERENCE_ID, new String[] { FIELD_NAME }, null) @@ -78,8 +80,14 @@ public void testKnnWithQueryBuilderWithoutInferenceIdIsInterceptedAndRewritten() QueryRewriteContext context = createQueryRewriteContext(inferenceFields); QueryVectorBuilder queryVectorBuilder = new TextEmbeddingQueryVectorBuilder(null, QUERY); KnnVectorQueryBuilder original = new KnnVectorQueryBuilder(FIELD_NAME, queryVectorBuilder, 10, 100, null); - original.boost(boost); - original.queryName(queryName); + if (randomBoolean()) { + float boost = randomFloatBetween(1, 10, randomBoolean()); + original.boost(boost); + } + if (randomBoolean()) { + String queryName = randomAlphaOfLength(5); + original.queryName(queryName); + } testRewrittenInferenceQuery(context, original); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java index 22a91613b1498..b58547e1a92c7 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticMatchQueryRewriteInterceptorTests.java @@ -87,7 +87,9 @@ public void testBoostAndQueryNameInMatchQueryRewrite() throws IOException { new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) ); QueryRewriteContext context = createQueryRewriteContext(inferenceFields); - QueryBuilder original = createTestQueryBuilderWithBoostAndQueryName(); + QueryBuilder original = createTestQueryBuilder(); + original.boost(BOOST); + original.queryName(QUERY_NAME); QueryBuilder rewritten = original.rewrite(context); assertTrue( "Expected query to be intercepted, but was [" + rewritten.getClass().getName() + "]", @@ -106,13 +108,6 @@ private MatchQueryBuilder createTestQueryBuilder() { return new MatchQueryBuilder(FIELD_NAME, VALUE); } - private MatchQueryBuilder createTestQueryBuilderWithBoostAndQueryName() { - MatchQueryBuilder queryBuilder = createTestQueryBuilder(); - queryBuilder.boost(BOOST); - queryBuilder.queryName(QUERY_NAME); - return queryBuilder; - } - private QueryRewriteContext createQueryRewriteContext(Map inferenceFields) { IndexMetadata indexMetadata = IndexMetadata.builder(index.getName()) .settings( diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java index 3ee9ae49ff30e..401b7085e2cb5 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/index/query/SemanticSparseVectorQueryRewriteInterceptorTests.java @@ -52,30 +52,38 @@ public void cleanup() { } public void testSparseVectorQueryOnInferenceFieldIsInterceptedAndRewritten() throws IOException { - float boost = randomFloatBetween(1, 10, true); - String queryName = randomAlphaOfLength(5); Map inferenceFields = Map.of( FIELD_NAME, new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) ); QueryRewriteContext context = createQueryRewriteContext(inferenceFields); QueryBuilder original = new SparseVectorQueryBuilder(FIELD_NAME, INFERENCE_ID, QUERY); - original.boost(boost); - original.queryName(queryName); + if (randomBoolean()) { + float boost = randomFloatBetween(1, 10, randomBoolean()); + original.boost(boost); + } + if (randomBoolean()) { + String queryName = randomAlphaOfLength(5); + original.queryName(queryName); + } testRewrittenInferenceQuery(context, original); } public void testSparseVectorQueryOnInferenceFieldWithoutInferenceIdIsInterceptedAndRewritten() throws IOException { - float boost = randomFloatBetween(1, 10, true); - String queryName = randomAlphaOfLength(5); Map inferenceFields = Map.of( FIELD_NAME, new InferenceFieldMetadata(index.getName(), "inferenceId", new String[] { FIELD_NAME }, null) ); QueryRewriteContext context = createQueryRewriteContext(inferenceFields); QueryBuilder original = new SparseVectorQueryBuilder(FIELD_NAME, null, QUERY); - original.boost(boost); - original.queryName(queryName); + if (randomBoolean()) { + float boost = randomFloatBetween(1, 10, randomBoolean()); + original.boost(boost); + } + if (randomBoolean()) { + String queryName = randomAlphaOfLength(5); + original.queryName(queryName); + } testRewrittenInferenceQuery(context, original); }