From 7ad7e92e3258e2cd9f9d47deed2d89cbe83cf0df Mon Sep 17 00:00:00 2001 From: Anton Rubin Date: Mon, 19 May 2025 22:34:17 +0100 Subject: [PATCH 1/5] adding rank_feature dsl query docs Signed-off-by: Anton Rubin --- _query-dsl/specialized/index.md | 2 +- _query-dsl/specialized/rank-feature.md | 547 +++++++++++++++++++++++++ 2 files changed, 548 insertions(+), 1 deletion(-) create mode 100644 _query-dsl/specialized/rank-feature.md diff --git a/_query-dsl/specialized/index.md b/_query-dsl/specialized/index.md index d28451cfa8d..fd89887e397 100644 --- a/_query-dsl/specialized/index.md +++ b/_query-dsl/specialized/index.md @@ -22,7 +22,7 @@ OpenSearch supports the following specialized queries: - `percolate`: Finds queries (stored as documents) that match the provided document. -- `rank_feature`: Calculates scores based on the values of numeric features. This query can skip non-competitive hits. +- [`rank_feature`]({{site.url}}{{site.baseurl}}/query-dsl/specialized/rank-feature/): Calculates scores based on the values of numeric features. This query can skip non-competitive hits. - `script`: Uses a script as a filter. diff --git a/_query-dsl/specialized/rank-feature.md b/_query-dsl/specialized/rank-feature.md new file mode 100644 index 00000000000..73c13f4460a --- /dev/null +++ b/_query-dsl/specialized/rank-feature.md @@ -0,0 +1,547 @@ +--- +layout: default +title: Rank feature +parent: Specialized queries +nav_order: 75 +--- + +# Rank feature + +Use the `rank_feature` query to boost document scores based on numeric values in the document, such as relevance scores, popularity, or freshness. This query is ideal if you want to fine-tune relevance ranking using numerical features. Unlike [full-text queries]({{site.url}}{{site.baseurl}}/query-dsl/full-text/index/), `rank_feature` focuses solely on a numeric signal, and is most effective when combined with other queries in a compound query like `bool`. + +The `rank_feature` query expects the target field to be mapped as a [`rank_feature` field type]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/rank/). This enables internally optimized scoring for fast and efficient boosting. + +The score impact depends on the field value and the optional `saturation`, `log` or `sigmoid` function used. + +## Parameters + +| Parameter | Required/Optional | Description | +| ----------------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------ | +| `field` | Required | A `rank_feature` or `rank_features` field that contributes to document scoring.| +| `boost` | Optional | A multiplier applied to the score. Default is `1.0`. Values between 0 and 1 reduce the score, values above 1 amplify it. | +| `saturation` | Optional | Applies a saturation function to the feature value. Boost grows with value but levels off beyond the `pivot`. (Default function if no other function is provided)| +| `log` | Optional | Uses a logarithmic scoring function based on the field value. Best for large ranges of values.| +| `sigmoid` | Optional | Applies a sigmoid (S-shaped) curve to score impact, controlled by `pivot` and `exponent`.| +| `positive_score_impact` | Optional | When `false`, lower values score higher. Useful for features like price where smaller is better. Defined as part of the mapping. (Default is `true`)| + +Only one function out of `saturation`, `log`, or `sigmoid` may be used at a time. +{: .note} + +## Create an index with rank feature field + +Define an index with a `rank_feature` field to represent a signal like `popularity`: + +```json +PUT /products +{ + "mappings": { + "properties": { + "title": { "type": "text" }, + "popularity": { "type": "rank_feature" } + } + } +} +``` +{% include copy-curl.html %} + +## Index example documents + +Add sample products with varying popularity values: + +```json +POST /products/_bulk +{ "index": { "_id": 1 } } +{ "title": "Wireless Earbuds", "popularity": 1 } +{ "index": { "_id": 2 } } +{ "title": "Bluetooth Speaker", "popularity": 10 } +{ "index": { "_id": 3 } } +{ "title": "Portable Charger", "popularity": 25 } +{ "index": { "_id": 4 } } +{ "title": "Smartwatch", "popularity": 50 } +{ "index": { "_id": 5 } } +{ "title": "Noise Cancelling Headphones", "popularity": 100 } +{ "index": { "_id": 6 } } +{ "title": "Gaming Laptop", "popularity": 250 } +{ "index": { "_id": 7 } } +{ "title": "4K Monitor", "popularity": 500 } +``` +{% include copy-curl.html %} + +## Basic rank feature query + +You can boost results based on the `popularity` score using `rank_feature`: + +```json +POST /products/_search +{ + "query": { + "rank_feature": { + "field": "popularity" + } + } +} +``` +{% include copy-curl.html %} + +This query alone does not perform filtering, rather it scores all documents based on the value of `popularity`. Higher values yield higher scores: + +```json +{ + ... + "hits": { + "total": { + "value": 7, + "relation": "eq" + }, + "max_score": 0.9252834, + "hits": [ + { + "_index": "products", + "_id": "7", + "_score": 0.9252834, + "_source": { + "title": "4K Monitor", + "popularity": 500 + } + }, + { + "_index": "products", + "_id": "6", + "_score": 0.86095566, + "_source": { + "title": "Gaming Laptop", + "popularity": 250 + } + }, + { + "_index": "products", + "_id": "5", + "_score": 0.71237755, + "_source": { + "title": "Noise Cancelling Headphones", + "popularity": 100 + } + }, + { + "_index": "products", + "_id": "4", + "_score": 0.5532503, + "_source": { + "title": "Smartwatch", + "popularity": 50 + } + }, + { + "_index": "products", + "_id": "3", + "_score": 0.38240916, + "_source": { + "title": "Portable Charger", + "popularity": 25 + } + }, + { + "_index": "products", + "_id": "2", + "_score": 0.19851118, + "_source": { + "title": "Bluetooth Speaker", + "popularity": 10 + } + }, + { + "_index": "products", + "_id": "1", + "_score": 0.024169207, + "_source": { + "title": "Wireless Earbuds", + "popularity": 1 + } + } + ] + } +} +``` + +## Combine with full-text search + +To filter relevant results and boost them based on popularity use the following request: + +```json +POST /products/_search +{ + "query": { + "bool": { + "must": { + "match": { + "title": "headphones" + } + }, + "should": { + "rank_feature": { + "field": "popularity" + } + } + } + } +} +``` +{% include copy-curl.html %} + +This ranks all documents matching "headphones" and boosts those with higher popularity. + +## Boost parameter + +The `boost` parameter allows you to scale the score contribution of the rank_feature clause. It's especially useful in compound queries such as bool, where you want to control the influence of a feature relative to other conditions. + +In the following example, the `bool` query matches documents with the term "headphones" in the `title`, and boosts more popular results with a `rank_feature` clause using a `boost` of `2.0`: + +```json +POST /products/_search +{ + "query": { + "bool": { + "must": { + "match": { + "title": "headphones" + } + }, + "should": { + "rank_feature": { + "field": "popularity", + "boost": 2.0 + } + } + } + } +} +``` +{% include copy-curl.html %} + +This will double the contribution of the rank_feature score in the overall document score. A `boost` less than `1.0` would down-weight its influence. + +## Configure score function + +By default, the `rank_feature` query uses a `saturation` function with a `pivot` value derived from the field. You can explicitly control this with the `saturation`, `log` or `sigmoid` functions. + +### Saturation function + +The `saturation` function is the default scoring method used in `rank_feature` queries. It assigns higher scores to documents with larger feature values, but the increase in score becomes more gradual as the value exceeds a specified pivot. This is useful when you want to give diminishing returns to very large values, for example, boosting `popularity` while avoiding over-rewarding extremely high numbers. The formulae for calculating score is: `value of the rank_feature field / (value of the rank_feature field + pivot)`. The produced score is always between `0` and `1`. + +```json +POST /products/_search +{ + "query": { + "rank_feature": { + "field": "popularity", + "saturation": { + "pivot": 50 + } + } + } +} +``` +{% include copy-curl.html %} + +The `pivot` defines the point at which scoring growth slows down. Values higher than `pivot` still increase the score, but with diminishing returns, as can be seen in the returned hits: + +```json +{ + ... + "hits": { + "total": { + "value": 7, + "relation": "eq" + }, + "max_score": 0.9090909, + "hits": [ + { + "_index": "products", + "_id": "7", + "_score": 0.9090909, + "_source": { + "title": "4K Monitor", + "popularity": 500 + } + }, + { + "_index": "products", + "_id": "6", + "_score": 0.8333333, + "_source": { + "title": "Gaming Laptop", + "popularity": 250 + } + }, + { + "_index": "products", + "_id": "5", + "_score": 0.6666666, + "_source": { + "title": "Noise Cancelling Headphones", + "popularity": 100 + } + }, + { + "_index": "products", + "_id": "4", + "_score": 0.5, + "_source": { + "title": "Smartwatch", + "popularity": 50 + } + }, + { + "_index": "products", + "_id": "3", + "_score": 0.3333333, + "_source": { + "title": "Portable Charger", + "popularity": 25 + } + }, + { + "_index": "products", + "_id": "2", + "_score": 0.16666669, + "_source": { + "title": "Bluetooth Speaker", + "popularity": 10 + } + }, + { + "_index": "products", + "_id": "1", + "_score": 0.019607842, + "_source": { + "title": "Wireless Earbuds", + "popularity": 1 + } + } + ] + } +} +``` + +If the pivot is not provided, approximate geometric mean of all rank_feature values in the index is used. + +### Log function + +The log function is helpful when the range of values in your `rank_feature` field varies significantly. It applies a logarithmic scale to the `score`, which reduces the effect of extremely high values and helps normalize scoring across wide value distributions. This is especially useful when a small difference between low values should be more impactful than a large difference between high values. The score is derived using formulae: `log(scaling_factor + rank_feature field)`, see following example: + +```json +POST /products/_search +{ + "query": { + "rank_feature": { + "field": "popularity", + "log": { + "scaling_factor": 2 + } + } + } +} +``` +{% include copy-curl.html %} + +In the example dataset, the `popularity` field ranges from `1` to `500`. The `log` function compresses the `score` contribution from large values like `250` and `500`, while still allowing documents with `10` or `25` to have meaningful scores. This is unlike `saturation`, where documents above the pivot rapidly approach the same maximum score.” + +```json +{ + ... + "hits": { + "total": { + "value": 7, + "relation": "eq" + }, + "max_score": 6.2186003, + "hits": [ + { + "_index": "products", + "_id": "7", + "_score": 6.2186003, + "_source": { + "title": "4K Monitor", + "popularity": 500 + } + }, + { + "_index": "products", + "_id": "6", + "_score": 5.529429, + "_source": { + "title": "Gaming Laptop", + "popularity": 250 + } + }, + { + "_index": "products", + "_id": "5", + "_score": 4.624973, + "_source": { + "title": "Noise Cancelling Headphones", + "popularity": 100 + } + }, + { + "_index": "products", + "_id": "4", + "_score": 3.9512436, + "_source": { + "title": "Smartwatch", + "popularity": 50 + } + }, + { + "_index": "products", + "_id": "3", + "_score": 3.295837, + "_source": { + "title": "Portable Charger", + "popularity": 25 + } + }, + { + "_index": "products", + "_id": "2", + "_score": 2.4849067, + "_source": { + "title": "Bluetooth Speaker", + "popularity": 10 + } + }, + { + "_index": "products", + "_id": "1", + "_score": 1.0986123, + "_source": { + "title": "Wireless Earbuds", + "popularity": 1 + } + } + ] + } +} +``` + +### Sigmoid function + +The `sigmoid` function provides a smooth, S-shaped scoring curve which is especially useful when you want to control the steepness and midpoint of the scoring impact. The score is derived using formulae: `rank feature field value^exp / (rank feature field value^exp + pivot^exp)`, see following example: + +```json +POST /products/_search +{ + "query": { + "rank_feature": { + "field": "popularity", + "sigmoid": { + "pivot": 50, + "exponent": 0.5 + } + } + } +} +``` +{% include copy-curl.html %} + +* `pivot` defines the value at which the score is 0.5. +* `exponent` controls how steep the curve is. Lower values result in a sharper transition around the pivot. + +The sigmoid function smoothly boosts scores around the `pivot` (`50` in this case), giving moderate preference to values near the pivot while flattening out both high and low extremes: + +```json +{ + ... + "hits": { + "total": { + "value": 7, + "relation": "eq" + }, + "max_score": 0.7597469, + "hits": [ + { + "_index": "products", + "_id": "7", + "_score": 0.7597469, + "_source": { + "title": "4K Monitor", + "popularity": 500 + } + }, + { + "_index": "products", + "_id": "6", + "_score": 0.690983, + "_source": { + "title": "Gaming Laptop", + "popularity": 250 + } + }, + { + "_index": "products", + "_id": "5", + "_score": 0.58578646, + "_source": { + "title": "Noise Cancelling Headphones", + "popularity": 100 + } + }, + { + "_index": "products", + "_id": "4", + "_score": 0.5, + "_source": { + "title": "Smartwatch", + "popularity": 50 + } + }, + { + "_index": "products", + "_id": "3", + "_score": 0.41421357, + "_source": { + "title": "Portable Charger", + "popularity": 25 + } + }, + { + "_index": "products", + "_id": "2", + "_score": 0.309017, + "_source": { + "title": "Bluetooth Speaker", + "popularity": 10 + } + }, + { + "_index": "products", + "_id": "1", + "_score": 0.12389934, + "_source": { + "title": "Wireless Earbuds", + "popularity": 1 + } + } + ] + } +} +``` + +### Invert score impact + +By default, higher values lead to higher scores. If you want lower values to yield higher scores (e.g., lower prices are more relevant), set `positive_score_impact` to `false` during index creation: + +```json +PUT /products_new +{ + "mappings": { + "properties": { + "popularity": { + "type": "rank_feature", + "positive_score_impact": false + } + } + } +} +``` +{% include copy-curl.html %} From 4595b4529cc38a832521643b9cdbf06d54a6baab Mon Sep 17 00:00:00 2001 From: Anton Rubin Date: Tue, 8 Jul 2025 12:07:50 +0100 Subject: [PATCH 2/5] addressing PR comments Signed-off-by: Anton Rubin --- _query-dsl/specialized/rank-feature.md | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/_query-dsl/specialized/rank-feature.md b/_query-dsl/specialized/rank-feature.md index 73c13f4460a..b11fb63d6d9 100644 --- a/_query-dsl/specialized/rank-feature.md +++ b/_query-dsl/specialized/rank-feature.md @@ -11,7 +11,7 @@ Use the `rank_feature` query to boost document scores based on numeric values in The `rank_feature` query expects the target field to be mapped as a [`rank_feature` field type]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/rank/). This enables internally optimized scoring for fast and efficient boosting. -The score impact depends on the field value and the optional `saturation`, `log` or `sigmoid` function used. +The score impact depends on the field value and the optional `saturation`, `log` or `sigmoid` function used. These functions are applied dynamically at query time to compute the final document score, they do not alter or store any values in the document itself. ## Parameters @@ -192,9 +192,9 @@ This ranks all documents matching "headphones" and boosts those with higher popu ## Boost parameter -The `boost` parameter allows you to scale the score contribution of the rank_feature clause. It's especially useful in compound queries such as bool, where you want to control the influence of a feature relative to other conditions. +The `boost` parameter allows you to scale the score contribution of the rank_feature clause. It’s especially useful in compound queries such as `bool`, where you want to control how much influence a numeric field (such as popularity, freshness, or relevance score) has on the final document ranking. -In the following example, the `bool` query matches documents with the term "headphones" in the `title`, and boosts more popular results with a `rank_feature` clause using a `boost` of `2.0`: +In the following example, the bool query matches documents with the term "headphones" in the `title`, and boosts more popular results using a `rank_feature` clause with a `boost` of `2.0`: ```json POST /products/_search @@ -226,7 +226,7 @@ By default, the `rank_feature` query uses a `saturation` function with a `pivot` ### Saturation function -The `saturation` function is the default scoring method used in `rank_feature` queries. It assigns higher scores to documents with larger feature values, but the increase in score becomes more gradual as the value exceeds a specified pivot. This is useful when you want to give diminishing returns to very large values, for example, boosting `popularity` while avoiding over-rewarding extremely high numbers. The formulae for calculating score is: `value of the rank_feature field / (value of the rank_feature field + pivot)`. The produced score is always between `0` and `1`. +The `saturation` function is the default scoring method used in `rank_feature` queries. It assigns higher scores to documents with larger feature values, but the increase in score becomes more gradual as the value exceeds a specified pivot. This is useful when you want to give diminishing returns to very large values, for example, boosting `popularity` while avoiding over-rewarding extremely high numbers. The formulae for calculating score is: `value of the rank_feature field / (value of the rank_feature field + pivot)`. The produced score is always between `0` and `1`. If the pivot is not provided, approximate geometric mean of all `rank_feature` values in the index is used. See following example using `saturation` with `pivot` configured to `50`: ```json POST /products/_search @@ -323,8 +323,6 @@ The `pivot` defines the point at which scoring growth slows down. Values higher } ``` -If the pivot is not provided, approximate geometric mean of all rank_feature values in the index is used. - ### Log function The log function is helpful when the range of values in your `rank_feature` field varies significantly. It applies a logarithmic scale to the `score`, which reduces the effect of extremely high values and helps normalize scoring across wide value distributions. This is especially useful when a small difference between low values should be more impactful than a large difference between high values. The score is derived using formulae: `log(scaling_factor + rank_feature field)`, see following example: @@ -426,7 +424,7 @@ In the example dataset, the `popularity` field ranges from `1` to `500`. The `lo ### Sigmoid function -The `sigmoid` function provides a smooth, S-shaped scoring curve which is especially useful when you want to control the steepness and midpoint of the scoring impact. The score is derived using formulae: `rank feature field value^exp / (rank feature field value^exp + pivot^exp)`, see following example: +The `sigmoid` function provides a smooth, S-shaped scoring curve which is especially useful when you want to control the steepness and midpoint of the scoring impact. The score is derived using formulae: `rank feature field value^exp / (rank feature field value^exp + pivot^exp)`, see following example of a query using `sigmoid` function with configured `pivot` and `exponent`: ```json POST /products/_search @@ -529,7 +527,7 @@ The sigmoid function smoothly boosts scores around the `pivot` (`50` in this cas ### Invert score impact -By default, higher values lead to higher scores. If you want lower values to yield higher scores (e.g., lower prices are more relevant), set `positive_score_impact` to `false` during index creation: +By default, higher values lead to higher scores. If you want lower values to yield higher scores (for example, lower prices are more relevant), set `positive_score_impact` to `false` during index creation: ```json PUT /products_new From 1c38ab99fd2fe5ca7c0c919651a7e5eb38ec81a2 Mon Sep 17 00:00:00 2001 From: AntonEliatra Date: Wed, 9 Jul 2025 17:39:37 +0100 Subject: [PATCH 3/5] Apply suggestions from code review Co-authored-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com> Signed-off-by: AntonEliatra --- _query-dsl/specialized/rank-feature.md | 34 +++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/_query-dsl/specialized/rank-feature.md b/_query-dsl/specialized/rank-feature.md index b11fb63d6d9..267b7bcf950 100644 --- a/_query-dsl/specialized/rank-feature.md +++ b/_query-dsl/specialized/rank-feature.md @@ -7,7 +7,7 @@ nav_order: 75 # Rank feature -Use the `rank_feature` query to boost document scores based on numeric values in the document, such as relevance scores, popularity, or freshness. This query is ideal if you want to fine-tune relevance ranking using numerical features. Unlike [full-text queries]({{site.url}}{{site.baseurl}}/query-dsl/full-text/index/), `rank_feature` focuses solely on a numeric signal, and is most effective when combined with other queries in a compound query like `bool`. +Use the `rank_feature` query to boost document scores based on numeric values in the document, such as relevance scores, popularity, or freshness. This query is ideal if you want to fine-tune relevance ranking using numerical features. Unlike [full-text queries]({{site.url}}{{site.baseurl}}/query-dsl/full-text/index/), `rank_feature` focuses solely on a numeric signal; it is most effective when combined with other queries in a compound query like `bool`. The `rank_feature` query expects the target field to be mapped as a [`rank_feature` field type]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/rank/). This enables internally optimized scoring for fast and efficient boosting. @@ -15,14 +15,16 @@ The score impact depends on the field value and the optional `saturation`, `log` ## Parameters +The `rank_feature` query supports the following parameters. + | Parameter | Required/Optional | Description | | ----------------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------ | | `field` | Required | A `rank_feature` or `rank_features` field that contributes to document scoring.| | `boost` | Optional | A multiplier applied to the score. Default is `1.0`. Values between 0 and 1 reduce the score, values above 1 amplify it. | -| `saturation` | Optional | Applies a saturation function to the feature value. Boost grows with value but levels off beyond the `pivot`. (Default function if no other function is provided)| +| `saturation` | Optional | Applies a saturation function to the feature value. Boost grows with value but levels off beyond the `pivot`. Default function if no other function is provided. | | `log` | Optional | Uses a logarithmic scoring function based on the field value. Best for large ranges of values.| | `sigmoid` | Optional | Applies a sigmoid (S-shaped) curve to score impact, controlled by `pivot` and `exponent`.| -| `positive_score_impact` | Optional | When `false`, lower values score higher. Useful for features like price where smaller is better. Defined as part of the mapping. (Default is `true`)| +| `positive_score_impact` | Optional | When `false`, lower values score higher. Useful for features like price, for which smaller is better. Defined as part of the mapping. Default is `true`. | Only one function out of `saturation`, `log`, or `sigmoid` may be used at a time. {: .note} @@ -83,7 +85,7 @@ POST /products/_search ``` {% include copy-curl.html %} -This query alone does not perform filtering, rather it scores all documents based on the value of `popularity`. Higher values yield higher scores: +This query alone does not perform filtering. Rather, it scores all documents based on the value of `popularity`. Higher values yield higher scores: ```json { @@ -165,7 +167,7 @@ This query alone does not perform filtering, rather it scores all documents base ## Combine with full-text search -To filter relevant results and boost them based on popularity use the following request: +To filter relevant results and boost them based on popularity, use the following request. This query ranks all documents matching "headphones" and boosts those with higher popularity: ```json POST /products/_search @@ -188,13 +190,12 @@ POST /products/_search ``` {% include copy-curl.html %} -This ranks all documents matching "headphones" and boosts those with higher popularity. ## Boost parameter The `boost` parameter allows you to scale the score contribution of the rank_feature clause. It’s especially useful in compound queries such as `bool`, where you want to control how much influence a numeric field (such as popularity, freshness, or relevance score) has on the final document ranking. -In the following example, the bool query matches documents with the term "headphones" in the `title`, and boosts more popular results using a `rank_feature` clause with a `boost` of `2.0`: +In the following example, the `bool` query matches documents with the term "headphones" in the `title` and boosts more popular results using a `rank_feature` clause with a `boost` of `2.0`. This doubles the contribution of the `rank_feature` score in the overall document score: ```json POST /products/_search @@ -218,15 +219,16 @@ POST /products/_search ``` {% include copy-curl.html %} -This will double the contribution of the rank_feature score in the overall document score. A `boost` less than `1.0` would down-weight its influence. ## Configure score function -By default, the `rank_feature` query uses a `saturation` function with a `pivot` value derived from the field. You can explicitly control this with the `saturation`, `log` or `sigmoid` functions. +By default, the `rank_feature` query uses a `saturation` function with a `pivot` value derived from the field. You can explicitly set the function to `saturation`, `log` or `sigmoid`. ### Saturation function -The `saturation` function is the default scoring method used in `rank_feature` queries. It assigns higher scores to documents with larger feature values, but the increase in score becomes more gradual as the value exceeds a specified pivot. This is useful when you want to give diminishing returns to very large values, for example, boosting `popularity` while avoiding over-rewarding extremely high numbers. The formulae for calculating score is: `value of the rank_feature field / (value of the rank_feature field + pivot)`. The produced score is always between `0` and `1`. If the pivot is not provided, approximate geometric mean of all `rank_feature` values in the index is used. See following example using `saturation` with `pivot` configured to `50`: +The `saturation` function is the default scoring method used in `rank_feature` queries. It assigns higher scores to documents with larger feature values, but the increase in score becomes more gradual as the value exceeds a specified pivot. This is useful when you want to give diminishing returns to very large values, for example, boosting `popularity` while avoiding over-rewarding extremely high numbers. The formula for calculating score is: `value of the rank_feature field / (value of the rank_feature field + pivot)`. The produced score is always between `0` and `1`. If the pivot is not provided, approximate geometric mean of all `rank_feature` values in the index is used. + +The following example uses `saturation` with a `pivot` of `50`: ```json POST /products/_search @@ -243,7 +245,7 @@ POST /products/_search ``` {% include copy-curl.html %} -The `pivot` defines the point at which scoring growth slows down. Values higher than `pivot` still increase the score, but with diminishing returns, as can be seen in the returned hits: +The `pivot` defines the point at which the scoring growth slows down. Values higher than `pivot` still increase the score, but with diminishing returns, as can be seen in the returned hits: ```json { @@ -325,7 +327,7 @@ The `pivot` defines the point at which scoring growth slows down. Values higher ### Log function -The log function is helpful when the range of values in your `rank_feature` field varies significantly. It applies a logarithmic scale to the `score`, which reduces the effect of extremely high values and helps normalize scoring across wide value distributions. This is especially useful when a small difference between low values should be more impactful than a large difference between high values. The score is derived using formulae: `log(scaling_factor + rank_feature field)`, see following example: +The log function is helpful when the range of values in your `rank_feature` field varies significantly. It applies a logarithmic scale to the `score`, which reduces the effect of extremely high values and helps normalize scoring across wide value distributions. This is especially useful when a small difference between low values should be more impactful than a large difference between high values. The score is calculated using the formula `log(scaling_factor + rank_feature field)`. The following example uses a `scaling_factor` of 2: ```json POST /products/_search @@ -342,7 +344,7 @@ POST /products/_search ``` {% include copy-curl.html %} -In the example dataset, the `popularity` field ranges from `1` to `500`. The `log` function compresses the `score` contribution from large values like `250` and `500`, while still allowing documents with `10` or `25` to have meaningful scores. This is unlike `saturation`, where documents above the pivot rapidly approach the same maximum score.” +In the example dataset, the `popularity` field ranges from `1` to `500`. The `log` function compresses the `score` contribution from large values like `250` and `500`, while still allowing documents with `10` or `25` to have meaningful scores. In contrast, if you applied the `saturation` function, documents above the pivot would rapidly approach the same maximum score: ```json { @@ -424,7 +426,7 @@ In the example dataset, the `popularity` field ranges from `1` to `500`. The `lo ### Sigmoid function -The `sigmoid` function provides a smooth, S-shaped scoring curve which is especially useful when you want to control the steepness and midpoint of the scoring impact. The score is derived using formulae: `rank feature field value^exp / (rank feature field value^exp + pivot^exp)`, see following example of a query using `sigmoid` function with configured `pivot` and `exponent`: +The `sigmoid` function provides a smooth, S-shaped scoring curve, which is especially useful when you want to control the steepness and midpoint of the scoring impact. The score is derived using the formula `rank feature field value^exp / (rank feature field value^exp + pivot^exp)`. The following example uses a `sigmoid` function with a configured `pivot` and `exponent`. The `pivot` defines the value at which the score is 0.5. The `exponent` controls how steep the curve is. Lower values result in a sharper transition around the pivot: ```json POST /products/_search @@ -442,10 +444,8 @@ POST /products/_search ``` {% include copy-curl.html %} -* `pivot` defines the value at which the score is 0.5. -* `exponent` controls how steep the curve is. Lower values result in a sharper transition around the pivot. -The sigmoid function smoothly boosts scores around the `pivot` (`50` in this case), giving moderate preference to values near the pivot while flattening out both high and low extremes: +The sigmoid function smoothly boosts scores around the `pivot` (in this example,`50`), giving moderate preference to values near the pivot while flattening out both high and low extremes: ```json { From af59cfea233ee830e642ce164818ad0711c99308 Mon Sep 17 00:00:00 2001 From: Anton Rubin Date: Wed, 9 Jul 2025 18:04:21 +0100 Subject: [PATCH 4/5] addressing the PR comments Signed-off-by: Anton Rubin --- _query-dsl/specialized/rank-feature.md | 41 +++++++++++++------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/_query-dsl/specialized/rank-feature.md b/_query-dsl/specialized/rank-feature.md index 267b7bcf950..0ea48edb18e 100644 --- a/_query-dsl/specialized/rank-feature.md +++ b/_query-dsl/specialized/rank-feature.md @@ -17,19 +17,20 @@ The score impact depends on the field value and the optional `saturation`, `log` The `rank_feature` query supports the following parameters. -| Parameter | Required/Optional | Description | -| ----------------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------ | -| `field` | Required | A `rank_feature` or `rank_features` field that contributes to document scoring.| -| `boost` | Optional | A multiplier applied to the score. Default is `1.0`. Values between 0 and 1 reduce the score, values above 1 amplify it. | -| `saturation` | Optional | Applies a saturation function to the feature value. Boost grows with value but levels off beyond the `pivot`. Default function if no other function is provided. | -| `log` | Optional | Uses a logarithmic scoring function based on the field value. Best for large ranges of values.| -| `sigmoid` | Optional | Applies a sigmoid (S-shaped) curve to score impact, controlled by `pivot` and `exponent`.| -| `positive_score_impact` | Optional | When `false`, lower values score higher. Useful for features like price, for which smaller is better. Defined as part of the mapping. Default is `true`. | +| Parameter | Data type | Required/Optional | Description | +| ----------------------- | --------- | ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `field` | String | Required | A `rank_feature` or `rank_features` field that contributes to document scoring. | +| `boost` | Float | Optional | A multiplier applied to the score. Default is `1.0`. Values between 0 and 1 reduce the score, values above 1 amplify it. | +| `saturation` | Object | Optional | Applies a saturation function to the feature value. Boost grows with value but levels off beyond the `pivot`. Default function if no other function is provided. Only one function out of `saturation`, `log`, or `sigmoid` may be used at a time.| +| `log` | Object | Optional | Uses a logarithmic scoring function based on the field value. Best for large ranges of values. Only one function out of `saturation`, `log`, or `sigmoid` may be used at a time. | +| `sigmoid` | Object | Optional | Applies a sigmoid (S-shaped) curve to score impact, controlled by `pivot` and `exponent`. Only one function out of `saturation`, `log`, or `sigmoid` may be used at a time. | +| `positive_score_impact` | Boolean | Optional | When `false`, lower values score higher. Useful for features like price, for which smaller is better. Defined as part of the mapping. Default is `true`. | -Only one function out of `saturation`, `log`, or `sigmoid` may be used at a time. -{: .note} +## Example -## Create an index with rank feature field +The following examples demonstrate how to define and use a `rank_feature` field to influence document scoring. + +### Create an index with rank feature field Define an index with a `rank_feature` field to represent a signal like `popularity`: @@ -46,7 +47,7 @@ PUT /products ``` {% include copy-curl.html %} -## Index example documents +### Index example documents Add sample products with varying popularity values: @@ -69,7 +70,7 @@ POST /products/_bulk ``` {% include copy-curl.html %} -## Basic rank feature query +### Basic rank feature query You can boost results based on the `popularity` score using `rank_feature`: @@ -165,7 +166,7 @@ This query alone does not perform filtering. Rather, it scores all documents bas } ``` -## Combine with full-text search +### Combine with full-text search To filter relevant results and boost them based on popularity, use the following request. This query ranks all documents matching "headphones" and boosts those with higher popularity: @@ -191,7 +192,7 @@ POST /products/_search {% include copy-curl.html %} -## Boost parameter +### Boost parameter The `boost` parameter allows you to scale the score contribution of the rank_feature clause. It’s especially useful in compound queries such as `bool`, where you want to control how much influence a numeric field (such as popularity, freshness, or relevance score) has on the final document ranking. @@ -220,11 +221,11 @@ POST /products/_search {% include copy-curl.html %} -## Configure score function +### Configure score function By default, the `rank_feature` query uses a `saturation` function with a `pivot` value derived from the field. You can explicitly set the function to `saturation`, `log` or `sigmoid`. -### Saturation function +#### Saturation function The `saturation` function is the default scoring method used in `rank_feature` queries. It assigns higher scores to documents with larger feature values, but the increase in score becomes more gradual as the value exceeds a specified pivot. This is useful when you want to give diminishing returns to very large values, for example, boosting `popularity` while avoiding over-rewarding extremely high numbers. The formula for calculating score is: `value of the rank_feature field / (value of the rank_feature field + pivot)`. The produced score is always between `0` and `1`. If the pivot is not provided, approximate geometric mean of all `rank_feature` values in the index is used. @@ -325,7 +326,7 @@ The `pivot` defines the point at which the scoring growth slows down. Values hig } ``` -### Log function +#### Log function The log function is helpful when the range of values in your `rank_feature` field varies significantly. It applies a logarithmic scale to the `score`, which reduces the effect of extremely high values and helps normalize scoring across wide value distributions. This is especially useful when a small difference between low values should be more impactful than a large difference between high values. The score is calculated using the formula `log(scaling_factor + rank_feature field)`. The following example uses a `scaling_factor` of 2: @@ -424,7 +425,7 @@ In the example dataset, the `popularity` field ranges from `1` to `500`. The `lo } ``` -### Sigmoid function +#### Sigmoid function The `sigmoid` function provides a smooth, S-shaped scoring curve, which is especially useful when you want to control the steepness and midpoint of the scoring impact. The score is derived using the formula `rank feature field value^exp / (rank feature field value^exp + pivot^exp)`. The following example uses a `sigmoid` function with a configured `pivot` and `exponent`. The `pivot` defines the value at which the score is 0.5. The `exponent` controls how steep the curve is. Lower values result in a sharper transition around the pivot: @@ -525,7 +526,7 @@ The sigmoid function smoothly boosts scores around the `pivot` (in this example, } ``` -### Invert score impact +#### Invert score impact By default, higher values lead to higher scores. If you want lower values to yield higher scores (for example, lower prices are more relevant), set `positive_score_impact` to `false` during index creation: From 4ad6e4212f58309490c8e41613f3eba0b0492a67 Mon Sep 17 00:00:00 2001 From: Nathan Bower Date: Thu, 10 Jul 2025 11:22:19 -0400 Subject: [PATCH 5/5] Apply suggestions from code review Signed-off-by: Nathan Bower --- _query-dsl/specialized/rank-feature.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/_query-dsl/specialized/rank-feature.md b/_query-dsl/specialized/rank-feature.md index 0ea48edb18e..9adda9c7132 100644 --- a/_query-dsl/specialized/rank-feature.md +++ b/_query-dsl/specialized/rank-feature.md @@ -7,11 +7,11 @@ nav_order: 75 # Rank feature -Use the `rank_feature` query to boost document scores based on numeric values in the document, such as relevance scores, popularity, or freshness. This query is ideal if you want to fine-tune relevance ranking using numerical features. Unlike [full-text queries]({{site.url}}{{site.baseurl}}/query-dsl/full-text/index/), `rank_feature` focuses solely on a numeric signal; it is most effective when combined with other queries in a compound query like `bool`. +Use the `rank_feature` query to boost document scores based on numeric values in a document, such as relevance scores, popularity, or freshness. This query is ideal if you want to fine-tune relevance ranking using numerical features. Unlike [full-text queries]({{site.url}}{{site.baseurl}}/query-dsl/full-text/index/), `rank_feature` focuses solely on a numeric signal; it is most effective when combined with other queries in a compound query like `bool`. The `rank_feature` query expects the target field to be mapped as a [`rank_feature` field type]({{site.url}}{{site.baseurl}}/field-types/supported-field-types/rank/). This enables internally optimized scoring for fast and efficient boosting. -The score impact depends on the field value and the optional `saturation`, `log` or `sigmoid` function used. These functions are applied dynamically at query time to compute the final document score, they do not alter or store any values in the document itself. +The score impact depends on the field value and the optional `saturation`, `log`, or `sigmoid` function used. These functions are applied dynamically at query time to compute the final document score; they do not alter or store any values in the document itself. ## Parameters @@ -20,7 +20,7 @@ The `rank_feature` query supports the following parameters. | Parameter | Data type | Required/Optional | Description | | ----------------------- | --------- | ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `field` | String | Required | A `rank_feature` or `rank_features` field that contributes to document scoring. | -| `boost` | Float | Optional | A multiplier applied to the score. Default is `1.0`. Values between 0 and 1 reduce the score, values above 1 amplify it. | +| `boost` | Float | Optional | A multiplier applied to the score. Default is `1.0`. Values between 0 and 1 reduce the score; values above 1 amplify it. | | `saturation` | Object | Optional | Applies a saturation function to the feature value. Boost grows with value but levels off beyond the `pivot`. Default function if no other function is provided. Only one function out of `saturation`, `log`, or `sigmoid` may be used at a time.| | `log` | Object | Optional | Uses a logarithmic scoring function based on the field value. Best for large ranges of values. Only one function out of `saturation`, `log`, or `sigmoid` may be used at a time. | | `sigmoid` | Object | Optional | Applies a sigmoid (S-shaped) curve to score impact, controlled by `pivot` and `exponent`. Only one function out of `saturation`, `log`, or `sigmoid` may be used at a time. | @@ -30,7 +30,7 @@ The `rank_feature` query supports the following parameters. The following examples demonstrate how to define and use a `rank_feature` field to influence document scoring. -### Create an index with rank feature field +### Create an index with a rank feature field Define an index with a `rank_feature` field to represent a signal like `popularity`: @@ -194,9 +194,9 @@ POST /products/_search ### Boost parameter -The `boost` parameter allows you to scale the score contribution of the rank_feature clause. It’s especially useful in compound queries such as `bool`, where you want to control how much influence a numeric field (such as popularity, freshness, or relevance score) has on the final document ranking. +The `boost` parameter allows you to scale the score contribution of the rank_feature clause. It's especially useful in compound queries such as `bool`, where you want to control how much influence a numeric field (such as popularity, freshness, or relevance score) has on the final document ranking. -In the following example, the `bool` query matches documents with the term "headphones" in the `title` and boosts more popular results using a `rank_feature` clause with a `boost` of `2.0`. This doubles the contribution of the `rank_feature` score in the overall document score: +In the following example, the `bool` query matches documents with the term "headphones" in the `title` and boosts more popular results using a `rank_feature` clause with a `boost` of `2.0`. This doubles the contribution of the `rank_feature` score to the overall document score: ```json POST /products/_search @@ -227,7 +227,7 @@ By default, the `rank_feature` query uses a `saturation` function with a `pivot` #### Saturation function -The `saturation` function is the default scoring method used in `rank_feature` queries. It assigns higher scores to documents with larger feature values, but the increase in score becomes more gradual as the value exceeds a specified pivot. This is useful when you want to give diminishing returns to very large values, for example, boosting `popularity` while avoiding over-rewarding extremely high numbers. The formula for calculating score is: `value of the rank_feature field / (value of the rank_feature field + pivot)`. The produced score is always between `0` and `1`. If the pivot is not provided, approximate geometric mean of all `rank_feature` values in the index is used. +The `saturation` function is the default scoring method used in `rank_feature` queries. It assigns higher scores to documents with larger feature values, but the increase in score becomes more gradual as the value exceeds a specified `pivot`. This is useful when you want to give diminishing returns to very large values, for example, boosting `popularity` while avoiding over-rewarding extremely high numbers. The formula for calculating score is `value of the rank_feature field / (value of the rank_feature field + pivot)`. The produced score is always between `0` and `1`. If the `pivot` is not provided, the approximate geometric mean of all `rank_feature` values in the index is used. The following example uses `saturation` with a `pivot` of `50`: @@ -328,7 +328,7 @@ The `pivot` defines the point at which the scoring growth slows down. Values hig #### Log function -The log function is helpful when the range of values in your `rank_feature` field varies significantly. It applies a logarithmic scale to the `score`, which reduces the effect of extremely high values and helps normalize scoring across wide value distributions. This is especially useful when a small difference between low values should be more impactful than a large difference between high values. The score is calculated using the formula `log(scaling_factor + rank_feature field)`. The following example uses a `scaling_factor` of 2: +The `log` function is helpful when the `rank_feature` field contains a significant range of values. It applies a logarithmic scale to the `score`, which reduces the effect of extremely high values and helps normalize scoring across wide value distributions. This is especially useful when a small difference between low values should be more impactful than a large difference between high values. The score is calculated using the formula `log(scaling_factor + rank_feature field)`. The following example uses a `scaling_factor` of `2`: ```json POST /products/_search @@ -345,7 +345,7 @@ POST /products/_search ``` {% include copy-curl.html %} -In the example dataset, the `popularity` field ranges from `1` to `500`. The `log` function compresses the `score` contribution from large values like `250` and `500`, while still allowing documents with `10` or `25` to have meaningful scores. In contrast, if you applied the `saturation` function, documents above the pivot would rapidly approach the same maximum score: +In the example dataset, the `popularity` field ranges from `1` to `500`. The `log` function compresses the `score` contribution from large values like `250` and `500` while still allowing documents with `10` or `25` to have meaningful scores. In contrast, if you applied the `saturation` function, documents above the `pivot` would rapidly approach the same maximum score: ```json { @@ -427,7 +427,7 @@ In the example dataset, the `popularity` field ranges from `1` to `500`. The `lo #### Sigmoid function -The `sigmoid` function provides a smooth, S-shaped scoring curve, which is especially useful when you want to control the steepness and midpoint of the scoring impact. The score is derived using the formula `rank feature field value^exp / (rank feature field value^exp + pivot^exp)`. The following example uses a `sigmoid` function with a configured `pivot` and `exponent`. The `pivot` defines the value at which the score is 0.5. The `exponent` controls how steep the curve is. Lower values result in a sharper transition around the pivot: +The `sigmoid` function provides a smooth, S-shaped scoring curve, which is especially useful when you want to control the steepness and midpoint of the scoring impact. The score is derived using the formula `rank feature field value^exp / (rank feature field value^exp + pivot^exp)`. The following example uses a `sigmoid` function with a configured `pivot` and `exponent`. The `pivot` defines the value at which the score is 0.5. The `exponent` controls how steep the curve is. Lower values result in a sharper transition around the `pivot`: ```json POST /products/_search @@ -446,7 +446,7 @@ POST /products/_search {% include copy-curl.html %} -The sigmoid function smoothly boosts scores around the `pivot` (in this example,`50`), giving moderate preference to values near the pivot while flattening out both high and low extremes: +The `sigmoid` function smoothly boosts scores around the `pivot` (in this example,`50`), giving moderate preference to values near the `pivot` while flattening out both high and low extremes: ```json {