elastic · jimczi · Jul 25, 2025 · Jul 25, 2025 · Jul 25, 2025 · Jul 25, 2025
diff --git a/docs/changelog/131907.yaml b/docs/changelog/131907.yaml
@@ -0,0 +1,26 @@
+pr: 131907
+summary: Enable `exclude_source_vectors` by default for new indices
+area: Vector Search
+type: breaking
+issues: []
+breaking:
+  title: Enable `exclude_source_vectors` by default for new indices
+  area: Search
+  details: |-
+    The `exclude_source_vectors` setting is now enabled by default for newly created indices.
+    This means that vector fields (e.g., `dense_vector`) are no longer stored in the `_source` field
+    by default, although they remain fully accessible through search and retrieval operations.
+
+    Instead of being persisted in `_source`, vectors are now rehydrated on demand from the underlying
+    index structures when needed. This reduces index size and improves performance for typical vector
+    search workloads where the original vector values do not need to be part of the `_source`.
+
+    If your use case requires vector fields to be stored in `_source`, you can disable this behavior by
+    setting `exclude_source_vectors: false` at index creation time.
+  impact: |-
+    Vector fields will no longer be stored in `_source` by default for new indices. Applications or tools
+    that expect to see vector fields in `_source` (for raw document inspection)
+    may need to be updated or configured to explicitly retain vectors using `exclude_source_vectors: false`.
+
+    Retrieval of vector fields via search or the `_source` API remains fully supported.
+  notable: true
diff --git a/docs/reference/elasticsearch/mapping-reference/dense-vector.md b/docs/reference/elasticsearch/mapping-reference/dense-vector.md
@@ -102,6 +102,81 @@ PUT my-index-2
 
 {{es}} uses the [HNSW algorithm](https://arxiv.org/abs/1603.09320) to support efficient kNN search. Like most kNN algorithms, HNSW is an approximate method that sacrifices result accuracy for improved speed.
 
+## Accessing `dense_vector` fields in search responses
+```{applies_to}
+stack: ga 9.2
+serverless: ga
+```
+
+By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs.
+This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output.
+
+To retrieve vector values explicitly, you can use:
+
+* The `fields` option to request specific vector fields directly:
+
+```console
+POST my-index-2/_search
+{
+  "fields": ["my_vector"]
+}
+```
+
+- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses:
+
+```console
+POST my-index-2/_search
+{
+  "_source": {
+    "exclude_vectors": false
+  }
+}
+```
+
+### Storage behavior and `_source`
+
+By default, `dense_vector` fields are **not stored in `_source`** on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`.
+This setting is enabled by default for newly created indices and can only be set at index creation time.
+
+When enabled:
+
+* `dense_vector` fields are removed from `_source` and the rest of the `_source` is stored as usual.
+* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format.
+
+This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed.
+
+### Rehydration and precision
+
+When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. Internally, vectors are stored at float precision, so if they were originally indexed as higher-precision types (e.g., `double` or `long`), the rehydrated values will have reduced precision. This lossy representation is intended to save space while preserving search quality.
+
+### Storing original vectors in `_source`
+
+If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`:
+
+```console
+PUT my-index-include-vectors
+{
+  "settings": {
+    "index.mapping.exclude_source_vectors": false
+  },
+  "mappings": {
+    "properties": {
+      "my_vector": {
+        "type": "dense_vector"
+      }
+    }
+  }
+}
+```
+
+When this setting is disabled:
+
+* `dense_vector` fields are stored as part of the `_source`, exactly as indexed.
+* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage.
+* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`.
+
+This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values.
+
 ## Automatically quantize vectors for kNN search [dense-vector-quantization]
 
 The `dense_vector` type supports quantization to reduce the memory footprint required when [searching](docs-content://solutions/search/vector/knn.md#approximate-knn) `float` vectors. The three following quantization strategies are supported:
@@ -266,16 +341,16 @@ $$$dense-vector-index-options$$$
 `type`
 :   (Required, string) The type of kNN algorithm to use. Can be either any of:
     * `hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) for scalable approximate kNN search. This supports all `element_type` values.
-    * `int8_hnsw` - The default index type for some float vectors: 
-        
-      * {applies_to}`stack: ga 9.1` Default for float vectors with less than 384 dimensions. 
+    * `int8_hnsw` - The default index type for some float vectors:
+
+      * {applies_to}`stack: ga 9.1` Default for float vectors with less than 384 dimensions.
       * {applies_to}`stack: ga 9.0` Default for float all vectors.
-      
+
       This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically scalar quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 4x at the cost of some accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization).
     * `int4_hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically scalar quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 8x at the cost of some accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization).
     * `bbq_hnsw` - This utilizes the [HNSW algorithm](https://arxiv.org/abs/1603.09320) in addition to automatically binary quantization for scalable approximate kNN search with `element_type` of `float`. This can reduce the memory footprint by 32x at the cost of accuracy. See [Automatically quantize vectors for kNN search](#dense-vector-quantization).
-        
-      {applies_to}`stack: ga 9.1` `bbq_hnsw` is the default index type for float vectors with greater than or equal to 384 dimensions. 
+
+      {applies_to}`stack: ga 9.1` `bbq_hnsw` is the default index type for float vectors with greater than or equal to 384 dimensions.
     * `flat` - This utilizes a brute-force search algorithm for exact kNN search. This supports all `element_type` values.
     * `int8_flat` - This utilizes a brute-force search algorithm in addition to automatically scalar quantization. Only supports `element_type` of `float`.
     * `int4_flat` - This utilizes a brute-force search algorithm in addition to automatically half-byte scalar quantization. Only supports `element_type` of `float`.
@@ -295,8 +370,8 @@ $$$dense-vector-index-options$$$
 :   (Optional, object) An optional section that configures automatic vector rescoring on knn queries for the given field. Only applicable to quantized index types.
 :::::{dropdown} Properties of rescore_vector
 `oversample`
-:   (required, float) The amount to oversample the search results by. This value should be one of the following: 
-    * Greater than `1.0` and less than `10.0` 
+:   (required, float) The amount to oversample the search results by. This value should be one of the following:
+    * Greater than `1.0` and less than `10.0`
     * Exactly `0` to indicate no oversampling and rescoring should occur {applies_to}`stack: ga 9.1`
     :   The higher the value, the more vectors will be gathered and rescored with the raw values per shard.
     :   In case a knn query specifies a `rescore_vector` parameter, the query `rescore_vector` parameter will be used instead.

diff --git a/docs/reference/elasticsearch/mapping-reference/rank-vectors.md b/docs/reference/elasticsearch/mapping-reference/rank-vectors.md
@@ -108,11 +108,81 @@ $$$rank-vectors-element-type$$$
 `dims`
 :   (Optional, integer) Number of vector dimensions. Can’t exceed `4096`. If `dims` is not specified, it will be set to the length of the first vector added to the field.
 
+## Accessing `dense_vector` fields in search responses
+```{applies_to}
+stack: ga 9.2
+serverless: ga
+```
+
+By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs.
+This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output.
+
+To retrieve vector values explicitly, you can use:
+
+* The `fields` option to request specific vector fields directly:
+
+```console
+POST my-index-2/_search
+{
+  "fields": ["my_vector"]
+}
+```
+
+- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses:
+
+```console
+POST my-index-2/_search
+{
+  "_source": {
+    "exclude_vectors": false
+  }
+}
+```
+
+### Storage behavior and `_source`
+
+By default, `rank_vectors` fields are not stored in `_source` on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`.
+This setting is enabled by default for newly created indices and can only be set at index creation time.
+
+When enabled:
+
+* `rank_vectors` fields are removed from `_source` and the rest of the `_source` is stored as usual.
+* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format.
+
+This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed.
+
+### Rehydration and precision
+
+When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format. Internally, vectors are stored at float precision, so if they were originally indexed as higher-precision types (e.g., `double` or `long`), the rehydrated values will have reduced precision. This lossy representation is intended to save space while preserving search quality.
+
+### Storing original vectors in `_source`
+
+If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`:
+
+```console
+PUT my-index-include-vectors
+{
+  "settings": {
+    "index.mapping.exclude_source_vectors": false
+  },
+  "mappings": {
+    "properties": {
+      "my_vector": {
+        "type": "rank_vectors",
+        "dims": 128
+      }
+    }
+  }
+}
+```
 
-## Synthetic `_source` [rank-vectors-synthetic-source]
+When this setting is disabled:
 
-`rank_vectors` fields support [synthetic `_source`](mapping-source-field.md#synthetic-source) .
+* `rank_vectors` fields are stored as part of the `_source`, exactly as indexed.
+* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage.
+* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`.
 
+This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values.
 
 ## Scoring with rank vectors [rank-vectors-scoring]
 

diff --git a/docs/reference/elasticsearch/mapping-reference/sparse-vector.md b/docs/reference/elasticsearch/mapping-reference/sparse-vector.md
@@ -57,12 +57,6 @@ See [semantic search with ELSER](docs-content://solutions/search/semantic-search
 
 The following parameters are accepted by `sparse_vector` fields:
 
-[store](/reference/elasticsearch/mapping-reference/mapping-store.md)
-:   Indicates whether the field value should be stored and retrievable independently of the [_source](/reference/elasticsearch/mapping-reference/mapping-source-field.md) field. Accepted values: true or false (default). The field’s data is stored using term vectors, a disk-efficient structure compared to the original JSON input. The input map can be retrieved during a search request via the [`fields` parameter](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#search-fields-param). To benefit from reduced disk usage, you must either:
-
-    * Exclude the field from [_source](/reference/elasticsearch/rest-apis/retrieve-selected-fields.md#source-filtering).
-    * Use [synthetic `_source`](/reference/elasticsearch/mapping-reference/mapping-source-field.md#synthetic-source).
-
 index_options {applies_to}`stack: ga 9.1`
 :   (Optional, object) You can set index options for your  `sparse_vector` field to determine if you should prune tokens, and the parameter configurations for the token pruning. If pruning options are not set in your [`sparse_vector` query](/reference/query-languages/query-dsl/query-dsl-sparse-vector-query.md), Elasticsearch will use the default options configured for the field, if any.
 
@@ -96,6 +90,82 @@ This ensures that:
 * The tokens that are kept are frequent enough and have significant scoring.
 * Very infrequent tokens that may not have as high of a score are removed.
 
+## Accessing `dense_vector` fields in search responses
+```{applies_to}
+stack: ga 9.2
+serverless: ga
+```
+
+By default, `dense_vector` fields are **not included in `_source`** in responses from the `_search`, `_msearch`, `_get`, and `_mget` APIs.
+This helps reduce response size and improve performance, especially in scenarios where vectors are used solely for similarity scoring and not required in the output.
+
+To retrieve vector values explicitly, you can use:
+
+* The `fields` option to request specific vector fields directly:
+
+```console
+POST my-index-2/_search
+{
+  "fields": ["my_vector"]
+}
+```
+
+- The `_source.exclude_vectors` flag to re-enable vector inclusion in `_source` responses:
+
+```console
+POST my-index-2/_search
+{
+  "_source": {
+    "exclude_vectors": false
+  }
+}
+```
+
+### Storage behavior and `_source`
+
+By default, `sparse_vector` fields are not stored in `_source` on disk. This is also controlled by the index setting `index.mapping.exclude_source_vectors`.
+This setting is enabled by default for newly created indices and can only be set at index creation time.
+
+When enabled:
+
+* `sparse_vector` fields are removed from `_source` and the rest of the `_source` is stored as usual.
+* If a request includes `_source` and vector values are needed (e.g., during recovery or reindex), the vectors are rehydrated from their internal format.
+
+This setting is compatible with synthetic `_source`, where the entire `_source` document is reconstructed from columnar storage. In full synthetic mode, no `_source` is stored on disk, and all fields — including vectors — are rebuilt when needed.
+
+### Rehydration and precision
+
+When vector values are rehydrated (e.g., for reindex, recovery, or explicit `_source` requests), they are restored from their internal format.
+Internally, vectors are stored as floats with 9 significant bits for the precision, so the rehydrated values will have reduced precision.
+This lossy representation is intended to save space while preserving search quality.
+
+### Storing original vectors in `_source`
+
+If you want to preserve the original vector values exactly as they were provided, you can re-enable vector storage in `_source`:
+
+```console
+PUT my-index-include-vectors
+{
+  "settings": {
+    "index.mapping.exclude_source_vectors": false
+  },
+  "mappings": {
+    "properties": {
+      "my_vector": {
+        "type": "sparse_vector"
+      }
+    }
+  }
+}
+```
+
+When this setting is disabled:
+
+* `sparse_vector` fields are stored as part of the `_source`, exactly as indexed.
+* The index will store both the original `_source` value and the internal representation used for vector search, resulting in increased storage usage.
+* Vectors are once again returned in `_source` by default in all relevant APIs, with no need to use `exclude_vectors` or `fields`.
+
+This configuration is appropriate when full source fidelity is required, such as for auditing or round-tripping exact input values.
 
 ## Multi-value sparse vectors [index-multi-value-sparse-vectors]
 

diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/ReindexBasicTests.java
@@ -23,7 +23,6 @@
 import java.util.Map;
 import java.util.stream.Collectors;
 
-import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS;
 import static org.elasticsearch.index.query.QueryBuilders.termQuery;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
@@ -182,14 +181,13 @@ public void testReindexFromComplexDateMathIndexName() throws Exception {
     }
 
     public void testReindexIncludeVectors() throws Exception {
-        assumeTrue("This test requires synthetic vectors to be enabled", SYNTHETIC_VECTORS);
         var resp1 = prepareCreate("test").setSettings(
-            Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build()
+            Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build()
         ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get();
         assertAcked(resp1);
 
         var resp2 = prepareCreate("test_reindex").setSettings(
-            Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build()
+            Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build()
         ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get();
         assertAcked(resp2);
 
@@ -237,5 +235,4 @@ public void testReindexIncludeVectors() throws Exception {
             searchResponse.decRef();
         }
     }
-
 }
diff --git a/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java b/modules/reindex/src/test/java/org/elasticsearch/reindex/UpdateByQueryBasicTests.java
@@ -24,7 +24,6 @@
 import java.util.Map;
 import java.util.stream.Collectors;
 
-import static org.elasticsearch.index.IndexSettings.SYNTHETIC_VECTORS;
 import static org.elasticsearch.index.query.QueryBuilders.termQuery;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
@@ -158,9 +157,8 @@ public void testMissingSources() {
     }
 
     public void testUpdateByQueryIncludeVectors() throws Exception {
-        assumeTrue("This test requires synthetic vectors to be enabled", SYNTHETIC_VECTORS);
         var resp1 = prepareCreate("test").setSettings(
-            Settings.builder().put(IndexSettings.INDEX_MAPPING_SOURCE_SYNTHETIC_VECTORS_SETTING.getKey(), true).build()
+            Settings.builder().put(IndexSettings.INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.getKey(), true).build()
         ).setMapping("foo", "type=dense_vector,similarity=l2_norm", "bar", "type=sparse_vector").get();
         assertAcked(resp1);