diff --git a/docs/overlays/elasticsearch-openapi-overlays.yaml b/docs/overlays/elasticsearch-openapi-overlays.yaml index 3c99fe1812..d83cc61e8b 100644 --- a/docs/overlays/elasticsearch-openapi-overlays.yaml +++ b/docs/overlays/elasticsearch-openapi-overlays.yaml @@ -618,14 +618,15 @@ actions: indicesLegacyPutTemplateRequestExample1: $ref: "../../specification/indices/put_template/examples/request/indicesPutTemplateRequestExample1.yaml" ## Examples for inference - - target: "$.components['requestBodies']['inference.stream_inference']" + - target: "$.paths['/_inference/chat_completion/{inference_id}/_stream']['post']" description: "Add example for inference stream request" update: - content: - application/json: - examples: - streamInferenceRequestExample1: - $ref: "../../specification/inference/stream_inference/examples/request/StreamInferenceRequestExample1.yaml" + requestBody: + content: + application/json: + examples: + streamInferenceRequestExample1: + $ref: "../../specification/inference/stream_completion/examples/request/StreamInferenceRequestExample1.yaml" ## Examples for ingest - target: "$.components['requestBodies']['simulate.ingest']" description: "Add example for simulate ingest request" diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 57c51d5014..95fb60ed85 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -16615,68 +16615,218 @@ "x-state": "Added in 1.3.0" } }, - "/_inference/{inference_id}": { - "get": { + "/_inference/chat_completion/{inference_id}/_stream": { + "post": { "tags": [ "inference" ], - "summary": "Get an inference endpoint", - "operationId": "inference-get-1", + "summary": "Perform chat completion inference", + "operationId": "inference-chat-completion-unified", "parameters": [ { - "$ref": "#/components/parameters/inference.get#inference_id" + "in": "path", + "name": "inference_id", + "description": "The inference Id", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + }, + { + "in": "query", + "name": "timeout", + "description": "Specifies the amount of time to wait for the inference request to complete.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Duration" + }, + "style": "form" } ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "messages": { + "description": "A list of objects representing the conversation.", + "type": "array", + "items": { + "$ref": "#/components/schemas/inference.chat_completion_unified:Message" + } + }, + "model": { + "description": "The ID of the model to use.", + "type": "string" + }, + "max_completion_tokens": { + "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", + "type": "number" + }, + "stop": { + "description": "A sequence of strings to control when the model should stop generating additional tokens.", + "type": "array", + "items": { + "type": "string" + } + }, + "temperature": { + "description": "The sampling temperature to use.", + "type": "number" + }, + "tool_choice": { + "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolType" + }, + "tools": { + "description": "A list of tools that the model can call.", + "type": "array", + "items": { + "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionTool" + } + }, + "top_p": { + "description": "Nucleus sampling, an alternative to sampling with temperature.", + "type": "number" + } + }, + "required": [ + "messages" + ] + } + } + } + }, "responses": { "200": { - "$ref": "#/components/responses/inference.get#200" + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/_types:StreamResult" + } + } + } } }, - "x-state": "Added in 8.11.0" - }, - "put": { + "x-state": "Added in 8.18.0" + } + }, + "/_inference/completion/{inference_id}": { + "post": { "tags": [ "inference" ], - "summary": "Create an inference endpoint", - "description": "When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "operationId": "inference-put", + "summary": "Perform completion inference on the service", + "operationId": "inference-completion", "parameters": [ { - "$ref": "#/components/parameters/inference.put#inference_id" + "in": "path", + "name": "inference_id", + "description": "The inference Id", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + }, + { + "in": "query", + "name": "timeout", + "description": "Specifies the amount of time to wait for the inference request to complete.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Duration" + }, + "style": "form" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.put" + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "input": { + "description": "Inference input.\nEither a string or an array of strings.", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types:TaskSettings" + } + }, + "required": [ + "input" + ] + } + } + } }, "responses": { "200": { - "$ref": "#/components/responses/inference.put#200" + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:CompletionInferenceResult" + } + } + } } }, "x-state": "Added in 8.11.0" - }, - "post": { + } + }, + "/_inference/{inference_id}": { + "get": { "tags": [ "inference" ], - "summary": "Perform inference on the service", - "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "operationId": "inference-inference", + "summary": "Get an inference endpoint", + "operationId": "inference-get-1", "parameters": [ { - "$ref": "#/components/parameters/inference.inference#inference_id" - }, + "$ref": "#/components/parameters/inference.get#inference_id" + } + ], + "responses": { + "200": { + "$ref": "#/components/responses/inference.get#200" + } + }, + "x-state": "Added in 8.11.0" + }, + "put": { + "tags": [ + "inference" + ], + "summary": "Create an inference endpoint", + "description": "When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "operationId": "inference-put", + "parameters": [ { - "$ref": "#/components/parameters/inference.inference#timeout" + "$ref": "#/components/parameters/inference.put#inference_id" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.inference" + "$ref": "#/components/requestBodies/inference.put" }, "responses": { "200": { - "$ref": "#/components/responses/inference.inference#200" + "$ref": "#/components/responses/inference.put#200" } }, "x-state": "Added in 8.11.0" @@ -16753,34 +16903,6 @@ }, "x-state": "Added in 8.11.0" }, - "post": { - "tags": [ - "inference" - ], - "summary": "Perform inference on the service", - "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "operationId": "inference-inference-1", - "parameters": [ - { - "$ref": "#/components/parameters/inference.inference#task_type" - }, - { - "$ref": "#/components/parameters/inference.inference#inference_id" - }, - { - "$ref": "#/components/parameters/inference.inference#timeout" - } - ], - "requestBody": { - "$ref": "#/components/requestBodies/inference.inference" - }, - "responses": { - "200": { - "$ref": "#/components/responses/inference.inference#200" - } - }, - "x-state": "Added in 8.11.0" - }, "delete": { "tags": [ "inference" @@ -16892,114 +17014,308 @@ "x-state": "Added in 8.16.0" } }, - "/_inference/{inference_id}/_stream": { + "/_inference/rerank/{inference_id}": { "post": { "tags": [ "inference" ], - "summary": "Perform streaming inference", - "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", - "operationId": "inference-stream-inference", + "summary": "Perform rereanking inference on the service", + "operationId": "inference-rerank", "parameters": [ { - "$ref": "#/components/parameters/inference.stream_inference#inference_id" + "in": "path", + "name": "inference_id", + "description": "The unique identifier for the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + }, + { + "in": "query", + "name": "timeout", + "description": "The amount of time to wait for the inference request to complete.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Duration" + }, + "style": "form" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.stream_inference" + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "query": { + "description": "Query input.", + "type": "string" + }, + "input": { + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types:TaskSettings" + } + }, + "required": [ + "query", + "input" + ] + } + } + } }, "responses": { "200": { - "$ref": "#/components/responses/inference.stream_inference#200" + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:RerankedInferenceResult" + } + } + } } }, - "x-state": "Added in 8.16.0" + "x-state": "Added in 8.11.0" } }, - "/_inference/{task_type}/{inference_id}/_stream": { + "/_inference/sparse_embedding/{inference_id}": { "post": { "tags": [ "inference" ], - "summary": "Perform streaming inference", - "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", - "operationId": "inference-stream-inference-1", + "summary": "Perform sparse embedding inference on the service", + "operationId": "inference-sparse-embedding", "parameters": [ { - "$ref": "#/components/parameters/inference.stream_inference#task_type" + "in": "path", + "name": "inference_id", + "description": "The inference Id", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" }, { - "$ref": "#/components/parameters/inference.stream_inference#inference_id" + "in": "query", + "name": "timeout", + "description": "Specifies the amount of time to wait for the inference request to complete.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Duration" + }, + "style": "form" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.stream_inference" + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "input": { + "description": "Inference input.\nEither a string or an array of strings.", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types:TaskSettings" + } + }, + "required": [ + "input" + ] + } + } + } }, "responses": { "200": { - "$ref": "#/components/responses/inference.stream_inference#200" + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:SparseEmbeddingInferenceResult" + } + } + } } }, - "x-state": "Added in 8.16.0" + "x-state": "Added in 8.11.0" } }, - "/_inference/{inference_id}/_unified": { + "/_inference/completion/{inference_id}/_stream": { "post": { "tags": [ "inference" ], - "summary": "Perform inference on the service using the Unified Schema", - "operationId": "inference-unified-inference", + "summary": "Perform streaming inference", + "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", + "operationId": "inference-stream-completion", "parameters": [ { - "$ref": "#/components/parameters/inference.unified_inference#inference_id" - }, - { - "$ref": "#/components/parameters/inference.unified_inference#timeout" + "in": "path", + "name": "inference_id", + "description": "The unique identifier for the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.unified_inference" + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "input": { + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types:TaskSettings" + } + }, + "required": [ + "input" + ] + } + } + } }, "responses": { "200": { - "$ref": "#/components/responses/inference.unified_inference#200" + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/_types:StreamResult" + } + } + } } }, - "x-state": "Added in 8.18.0" + "x-state": "Added in 8.16.0" } }, - "/_inference/{task_type}/{inference_id}/_unified": { + "/_inference/text_embedding/{inference_id}": { "post": { "tags": [ "inference" ], - "summary": "Perform inference on the service using the Unified Schema", - "operationId": "inference-unified-inference-1", + "summary": "Perform text embedding inference on the service", + "operationId": "inference-text-embedding", "parameters": [ { - "$ref": "#/components/parameters/inference.unified_inference#task_type" - }, - { - "$ref": "#/components/parameters/inference.unified_inference#inference_id" + "in": "path", + "name": "inference_id", + "description": "The inference Id", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" }, { - "$ref": "#/components/parameters/inference.unified_inference#timeout" + "in": "query", + "name": "timeout", + "description": "Specifies the amount of time to wait for the inference request to complete.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Duration" + }, + "style": "form" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.unified_inference" + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "input": { + "description": "Inference input.\nEither a string or an array of strings.", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types:TaskSettings" + } + }, + "required": [ + "input" + ] + } + } + } }, "responses": { "200": { - "$ref": "#/components/responses/inference.unified_inference#200" + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:TextEmbeddingInferenceResult" + } + } + } } }, - "x-state": "Added in 8.18.0" + "x-state": "Added in 8.11.0" } }, "/_inference/{inference_id}/_update": { - "post": { + "put": { "tags": [ "inference" ], @@ -17023,7 +17339,7 @@ } }, "/_inference/{task_type}/{inference_id}/_update": { - "post": { + "put": { "tags": [ "inference" ], @@ -63246,7 +63562,9 @@ "type": "object", "additionalProperties": { "$ref": "#/components/schemas/_types.mapping:DynamicTemplate" - } + }, + "minProperties": 1, + "maxProperties": 1 } }, "_field_names": { @@ -64335,6 +64653,12 @@ "index": { "type": "boolean" }, + "script": { + "$ref": "#/components/schemas/_types:Script" + }, + "on_script_error": { + "$ref": "#/components/schemas/_types.mapping:OnScriptError" + }, "null_value": { "$ref": "#/components/schemas/_types:DateTime" }, @@ -64377,6 +64701,12 @@ "index": { "type": "boolean" }, + "script": { + "$ref": "#/components/schemas/_types:Script" + }, + "on_script_error": { + "$ref": "#/components/schemas/_types.mapping:OnScriptError" + }, "null_value": { "$ref": "#/components/schemas/_types:DateTime" }, @@ -74154,315 +74484,11 @@ "valid" ] }, - "inference._types:TaskType": { - "type": "string", - "enum": [ - "sparse_embedding", - "text_embedding", - "rerank", - "completion" - ] - }, - "inference._types:DeleteInferenceEndpointResult": { - "allOf": [ - { - "$ref": "#/components/schemas/_types:AcknowledgedResponseBase" - }, - { - "type": "object", - "properties": { - "pipelines": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "pipelines" - ] - } - ] - }, - "inference._types:InferenceEndpointInfo": { - "allOf": [ - { - "$ref": "#/components/schemas/inference._types:InferenceEndpoint" - }, - { - "type": "object", - "properties": { - "inference_id": { - "description": "The inference Id", - "type": "string" - }, - "task_type": { - "$ref": "#/components/schemas/inference._types:TaskType" - } - }, - "required": [ - "inference_id", - "task_type" - ] - } - ] - }, - "inference._types:InferenceEndpoint": { - "type": "object", - "properties": { - "chunking_settings": { - "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" - }, - "service": { - "description": "The service type", - "type": "string" - }, - "service_settings": { - "$ref": "#/components/schemas/inference._types:ServiceSettings" - }, - "task_settings": { - "$ref": "#/components/schemas/inference._types:TaskSettings" - } - }, - "required": [ - "service", - "service_settings" - ] - }, - "inference._types:InferenceChunkingSettings": { - "allOf": [ - { - "$ref": "#/components/schemas/inference._types:InferenceEndpoint" - }, - { - "type": "object", - "properties": { - "max_chunk_size": { - "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", - "type": "number" - }, - "overlap": { - "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", - "type": "number" - }, - "sentence_overlap": { - "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", - "type": "number" - }, - "strategy": { - "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", - "type": "string" - } - } - } - ] - }, - "inference._types:ServiceSettings": { - "type": "object" - }, - "inference._types:TaskSettings": { - "type": "object" - }, - "inference._types:InferenceResult": { - "description": "InferenceResult is an aggregation of mutually exclusive variants", - "type": "object", - "properties": { - "text_embedding_bytes": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult" - } - }, - "text_embedding_bits": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult" - } - }, - "text_embedding": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:TextEmbeddingResult" - } - }, - "sparse_embedding": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:SparseEmbeddingResult" - } - }, - "completion": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:CompletionResult" - } - }, - "rerank": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:RankedDocument" - } - } - }, - "minProperties": 1, - "maxProperties": 1 - }, - "inference._types:TextEmbeddingByteResult": { - "type": "object", - "properties": { - "embedding": { - "$ref": "#/components/schemas/inference._types:DenseByteVector" - } - }, - "required": [ - "embedding" - ] - }, - "inference._types:DenseByteVector": { - "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.", - "type": "array", - "items": { - "$ref": "#/components/schemas/_types:byte" - } - }, - "inference._types:TextEmbeddingResult": { - "type": "object", - "properties": { - "embedding": { - "$ref": "#/components/schemas/inference._types:DenseVector" - } - }, - "required": [ - "embedding" - ] - }, - "inference._types:DenseVector": { - "description": "Text Embedding results are represented as Dense Vectors\nof floats.", - "type": "array", - "items": { - "type": "number" - } - }, - "inference._types:SparseEmbeddingResult": { - "type": "object", - "properties": { - "embedding": { - "$ref": "#/components/schemas/inference._types:SparseVector" - } - }, - "required": [ - "embedding" - ] - }, - "inference._types:SparseVector": { - "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.", - "type": "object", - "additionalProperties": { - "type": "number" - } - }, - "inference._types:CompletionResult": { - "type": "object", - "properties": { - "result": { - "type": "string" - } - }, - "required": [ - "result" - ] - }, - "inference._types:RankedDocument": { - "type": "object", - "properties": { - "index": { - "type": "number" - }, - "relevance_score": { - "type": "number" - }, - "text": { - "type": "string" - } - }, - "required": [ - "index", - "relevance_score" - ] - }, - "inference.put_watsonx:WatsonxTaskType": { - "type": "string", - "enum": [ - "text_embedding" - ] - }, - "inference.put_watsonx:ServiceType": { - "type": "string", - "enum": [ - "watsonxai" - ] - }, - "inference.put_watsonx:WatsonxServiceSettings": { - "type": "object", - "properties": { - "api_key": { - "externalDocs": { - "url": "https://cloud.ibm.com/iam/apikeys" - }, - "description": "A valid API key of your Watsonx account.\nYou can find your Watsonx API keys or you can create a new one on the API keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", - "type": "string" - }, - "api_version": { - "externalDocs": { - "url": "https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates" - }, - "description": "A version parameter that takes a version date in the format of `YYYY-MM-DD`.\nFor the active version data parameters, refer to the Wastonx documentation.", - "type": "string" - }, - "model_id": { - "externalDocs": { - "url": "https://www.ibm.com/products/watsonx-ai/foundation-models" - }, - "description": "The name of the model to use for the inference task.\nRefer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.", - "type": "string" - }, - "project_id": { - "description": "The identifier of the IBM Cloud project to use for the inference task.", - "type": "string" - }, - "rate_limit": { - "$ref": "#/components/schemas/inference._types:RateLimitSetting" - }, - "url": { - "description": "The URL of the inference endpoint that you created on Watsonx.", - "type": "string" - } - }, - "required": [ - "api_key", - "api_version", - "model_id", - "project_id", - "url" - ] - }, - "inference._types:RateLimitSetting": { - "type": "object", - "properties": { - "requests_per_minute": { - "description": "The number of requests allowed per minute.", - "type": "number" - } - } - }, - "_types:StreamResult": { - "type": "object" - }, - "inference.unified_inference:Message": { + "inference.chat_completion_unified:Message": { "type": "object", "properties": { "content": { - "$ref": "#/components/schemas/inference.unified_inference:MessageContent" + "$ref": "#/components/schemas/inference.chat_completion_unified:MessageContent" }, "role": { "description": "The role of the message author.", @@ -74475,7 +74501,7 @@ "description": "The tool calls generated by the model.", "type": "array", "items": { - "$ref": "#/components/schemas/inference.unified_inference:ToolCall" + "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCall" } } }, @@ -74483,7 +74509,7 @@ "role" ] }, - "inference.unified_inference:MessageContent": { + "inference.chat_completion_unified:MessageContent": { "oneOf": [ { "type": "string" @@ -74491,12 +74517,12 @@ { "type": "array", "items": { - "$ref": "#/components/schemas/inference.unified_inference:ContentObject" + "$ref": "#/components/schemas/inference.chat_completion_unified:ContentObject" } } ] }, - "inference.unified_inference:ContentObject": { + "inference.chat_completion_unified:ContentObject": { "type": "object", "properties": { "text": { @@ -74513,14 +74539,14 @@ "type" ] }, - "inference.unified_inference:ToolCall": { + "inference.chat_completion_unified:ToolCall": { "type": "object", "properties": { "id": { "$ref": "#/components/schemas/_types:Id" }, "function": { - "$ref": "#/components/schemas/inference.unified_inference:ToolCallFunction" + "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCallFunction" }, "type": { "description": "The type of the tool call.", @@ -74533,7 +74559,7 @@ "type" ] }, - "inference.unified_inference:ToolCallFunction": { + "inference.chat_completion_unified:ToolCallFunction": { "type": "object", "properties": { "arguments": { @@ -74550,17 +74576,17 @@ "name" ] }, - "inference.unified_inference:CompletionToolType": { + "inference.chat_completion_unified:CompletionToolType": { "oneOf": [ { "type": "string" }, { - "$ref": "#/components/schemas/inference.unified_inference:CompletionToolChoice" + "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoice" } ] }, - "inference.unified_inference:CompletionToolChoice": { + "inference.chat_completion_unified:CompletionToolChoice": { "type": "object", "properties": { "type": { @@ -74568,7 +74594,7 @@ "type": "string" }, "function": { - "$ref": "#/components/schemas/inference.unified_inference:CompletionToolChoiceFunction" + "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoiceFunction" } }, "required": [ @@ -74576,7 +74602,7 @@ "function" ] }, - "inference.unified_inference:CompletionToolChoiceFunction": { + "inference.chat_completion_unified:CompletionToolChoiceFunction": { "type": "object", "properties": { "name": { @@ -74588,7 +74614,7 @@ "name" ] }, - "inference.unified_inference:CompletionTool": { + "inference.chat_completion_unified:CompletionTool": { "type": "object", "properties": { "type": { @@ -74596,7 +74622,7 @@ "type": "string" }, "function": { - "$ref": "#/components/schemas/inference.unified_inference:CompletionToolFunction" + "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolFunction" } }, "required": [ @@ -74604,7 +74630,7 @@ "function" ] }, - "inference.unified_inference:CompletionToolFunction": { + "inference.chat_completion_unified:CompletionToolFunction": { "type": "object", "properties": { "description": { @@ -74628,6 +74654,335 @@ "name" ] }, + "_types:StreamResult": { + "type": "object" + }, + "inference._types:TaskSettings": { + "type": "object" + }, + "inference._types:CompletionInferenceResult": { + "type": "object", + "properties": { + "completion": { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference._types:CompletionResult" + } + } + }, + "required": [ + "completion" + ] + }, + "inference._types:CompletionResult": { + "type": "object", + "properties": { + "result": { + "type": "string" + } + }, + "required": [ + "result" + ] + }, + "inference._types:TaskType": { + "type": "string", + "enum": [ + "sparse_embedding", + "text_embedding", + "rerank", + "completion", + "chat_completion" + ] + }, + "inference._types:DeleteInferenceEndpointResult": { + "allOf": [ + { + "$ref": "#/components/schemas/_types:AcknowledgedResponseBase" + }, + { + "type": "object", + "properties": { + "pipelines": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "pipelines" + ] + } + ] + }, + "inference._types:InferenceEndpointInfo": { + "allOf": [ + { + "$ref": "#/components/schemas/inference._types:InferenceEndpoint" + }, + { + "type": "object", + "properties": { + "inference_id": { + "description": "The inference Id", + "type": "string" + }, + "task_type": { + "$ref": "#/components/schemas/inference._types:TaskType" + } + }, + "required": [ + "inference_id", + "task_type" + ] + } + ] + }, + "inference._types:InferenceEndpoint": { + "type": "object", + "properties": { + "chunking_settings": { + "$ref": "#/components/schemas/inference._types:InferenceChunkingSettings" + }, + "service": { + "description": "The service type", + "type": "string" + }, + "service_settings": { + "$ref": "#/components/schemas/inference._types:ServiceSettings" + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types:TaskSettings" + } + }, + "required": [ + "service", + "service_settings" + ] + }, + "inference._types:InferenceChunkingSettings": { + "allOf": [ + { + "$ref": "#/components/schemas/inference._types:InferenceEndpoint" + }, + { + "type": "object", + "properties": { + "max_chunk_size": { + "description": "Specifies the maximum size of a chunk in words\nThis value cannot be higher than `300` or lower than `20` (for `sentence` strategy) or `10` (for `word` strategy)", + "type": "number" + }, + "overlap": { + "description": "Specifies the number of overlapping words for chunks\nOnly for `word` chunking strategy\nThis value cannot be higher than the half of `max_chunk_size`", + "type": "number" + }, + "sentence_overlap": { + "description": "Specifies the number of overlapping sentences for chunks\nOnly for `sentence` chunking strategy\nIt can be either `1` or `0`", + "type": "number" + }, + "strategy": { + "description": "Specifies the chunking strategy\nIt could be either `sentence` or `word`", + "type": "string" + } + } + } + ] + }, + "inference._types:ServiceSettings": { + "type": "object" + }, + "inference.put_watsonx:WatsonxTaskType": { + "type": "string", + "enum": [ + "text_embedding" + ] + }, + "inference.put_watsonx:ServiceType": { + "type": "string", + "enum": [ + "watsonxai" + ] + }, + "inference.put_watsonx:WatsonxServiceSettings": { + "type": "object", + "properties": { + "api_key": { + "externalDocs": { + "url": "https://cloud.ibm.com/iam/apikeys" + }, + "description": "A valid API key of your Watsonx account.\nYou can find your Watsonx API keys or you can create a new one on the API keys page.\n\nIMPORTANT: You need to provide the API key only once, during the inference model creation.\nThe get inference endpoint API does not retrieve your API key.\nAfter creating the inference model, you cannot change the associated API key.\nIf you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.", + "type": "string" + }, + "api_version": { + "externalDocs": { + "url": "https://cloud.ibm.com/apidocs/watsonx-ai#active-version-dates" + }, + "description": "A version parameter that takes a version date in the format of `YYYY-MM-DD`.\nFor the active version data parameters, refer to the Wastonx documentation.", + "type": "string" + }, + "model_id": { + "externalDocs": { + "url": "https://www.ibm.com/products/watsonx-ai/foundation-models" + }, + "description": "The name of the model to use for the inference task.\nRefer to the IBM Embedding Models section in the Watsonx documentation for the list of available text embedding models.", + "type": "string" + }, + "project_id": { + "description": "The identifier of the IBM Cloud project to use for the inference task.", + "type": "string" + }, + "rate_limit": { + "$ref": "#/components/schemas/inference._types:RateLimitSetting" + }, + "url": { + "description": "The URL of the inference endpoint that you created on Watsonx.", + "type": "string" + } + }, + "required": [ + "api_key", + "api_version", + "model_id", + "project_id", + "url" + ] + }, + "inference._types:RateLimitSetting": { + "type": "object", + "properties": { + "requests_per_minute": { + "description": "The number of requests allowed per minute.", + "type": "number" + } + } + }, + "inference._types:RerankedInferenceResult": { + "type": "object", + "properties": { + "rerank": { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference._types:RankedDocument" + } + } + }, + "required": [ + "rerank" + ] + }, + "inference._types:RankedDocument": { + "type": "object", + "properties": { + "index": { + "type": "number" + }, + "relevance_score": { + "type": "number" + }, + "text": { + "type": "string" + } + }, + "required": [ + "index", + "relevance_score" + ] + }, + "inference._types:SparseEmbeddingInferenceResult": { + "type": "object", + "properties": { + "sparse_embedding": { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference._types:SparseEmbeddingResult" + } + } + }, + "required": [ + "sparse_embedding" + ] + }, + "inference._types:SparseEmbeddingResult": { + "type": "object", + "properties": { + "embedding": { + "$ref": "#/components/schemas/inference._types:SparseVector" + } + }, + "required": [ + "embedding" + ] + }, + "inference._types:SparseVector": { + "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.", + "type": "object", + "additionalProperties": { + "type": "number" + } + }, + "inference._types:TextEmbeddingInferenceResult": { + "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants", + "type": "object", + "properties": { + "text_embedding_bytes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult" + } + }, + "text_embedding_bits": { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult" + } + }, + "text_embedding": { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference._types:TextEmbeddingResult" + } + } + }, + "minProperties": 1, + "maxProperties": 1 + }, + "inference._types:TextEmbeddingByteResult": { + "type": "object", + "properties": { + "embedding": { + "$ref": "#/components/schemas/inference._types:DenseByteVector" + } + }, + "required": [ + "embedding" + ] + }, + "inference._types:DenseByteVector": { + "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.", + "type": "array", + "items": { + "$ref": "#/components/schemas/_types:byte" + } + }, + "inference._types:TextEmbeddingResult": { + "type": "object", + "properties": { + "embedding": { + "$ref": "#/components/schemas/inference._types:DenseVector" + } + }, + "required": [ + "embedding" + ] + }, + "inference._types:DenseVector": { + "description": "Text Embedding results are represented as Dense Vectors\nof floats.", + "type": "array", + "items": { + "type": "number" + } + }, "_types:ElasticsearchVersionInfo": { "type": "object", "properties": { @@ -88196,7 +88551,22 @@ } }, "field": { - "$ref": "#/components/schemas/security._types:FieldRule" + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "$ref": "#/components/schemas/_types:FieldValue" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/_types:FieldValue" + } + } + ] + }, + "minProperties": 1, + "maxProperties": 1 }, "except": { "$ref": "#/components/schemas/security._types:RoleMappingRule" @@ -88205,22 +88575,6 @@ "minProperties": 1, "maxProperties": 1 }, - "security._types:FieldRule": { - "type": "object", - "properties": { - "username": { - "$ref": "#/components/schemas/_types:Names" - }, - "dn": { - "$ref": "#/components/schemas/_types:Names" - }, - "groups": { - "$ref": "#/components/schemas/_types:Names" - } - }, - "minProperties": 1, - "maxProperties": 1 - }, "security.get_service_accounts:RoleDescriptorWrapper": { "type": "object", "properties": { @@ -96427,16 +96781,6 @@ } } }, - "inference.inference#200": { - "description": "", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/inference._types:InferenceResult" - } - } - } - }, "inference.put#200": { "description": "", "content": { @@ -96447,26 +96791,6 @@ } } }, - "inference.stream_inference#200": { - "description": "", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/_types:StreamResult" - } - } - } - }, - "inference.unified_inference#200": { - "description": "", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/_types:StreamResult" - } - } - } - }, "inference.update#200": { "description": "", "content": { @@ -104241,38 +104565,6 @@ }, "style": "simple" }, - "inference.inference#task_type": { - "in": "path", - "name": "task_type", - "description": "The type of inference task that the model performs.", - "required": true, - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/inference._types:TaskType" - }, - "style": "simple" - }, - "inference.inference#inference_id": { - "in": "path", - "name": "inference_id", - "description": "The unique identifier for the inference endpoint.", - "required": true, - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/_types:Id" - }, - "style": "simple" - }, - "inference.inference#timeout": { - "in": "query", - "name": "timeout", - "description": "The amount of time to wait for the inference request to complete.", - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/_types:Duration" - }, - "style": "form" - }, "inference.put#task_type": { "in": "path", "name": "task_type", @@ -104295,60 +104587,6 @@ }, "style": "simple" }, - "inference.stream_inference#inference_id": { - "in": "path", - "name": "inference_id", - "description": "The unique identifier for the inference endpoint.", - "required": true, - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/_types:Id" - }, - "style": "simple" - }, - "inference.stream_inference#task_type": { - "in": "path", - "name": "task_type", - "description": "The type of task that the model performs.", - "required": true, - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/inference._types:TaskType" - }, - "style": "simple" - }, - "inference.unified_inference#task_type": { - "in": "path", - "name": "task_type", - "description": "The task type", - "required": true, - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/inference._types:TaskType" - }, - "style": "simple" - }, - "inference.unified_inference#inference_id": { - "in": "path", - "name": "inference_id", - "description": "The inference Id", - "required": true, - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/_types:Id" - }, - "style": "simple" - }, - "inference.unified_inference#timeout": { - "in": "query", - "name": "timeout", - "description": "Specifies the amount of time to wait for the inference request to complete.", - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/_types:Duration" - }, - "style": "form" - }, "inference.update#inference_id": { "in": "path", "name": "inference_id", @@ -109602,7 +109840,9 @@ "type": "object", "additionalProperties": { "$ref": "#/components/schemas/_types.mapping:DynamicTemplate" - } + }, + "minProperties": 1, + "maxProperties": 1 } }, "_field_names": { @@ -109840,41 +110080,6 @@ } } }, - "inference.inference": { - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "query": { - "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.", - "type": "string" - }, - "input": { - "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", - "oneOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "task_settings": { - "$ref": "#/components/schemas/inference._types:TaskSettings" - } - }, - "required": [ - "input" - ] - } - } - } - }, "inference.put": { "content": { "application/json": { @@ -109885,88 +110090,6 @@ }, "required": true }, - "inference.stream_inference": { - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "input": { - "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.", - "oneOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - } - }, - "required": [ - "input" - ] - } - } - } - }, - "inference.unified_inference": { - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "messages": { - "description": "A list of objects representing the conversation.", - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.unified_inference:Message" - } - }, - "model": { - "description": "The ID of the model to use.", - "type": "string" - }, - "max_completion_tokens": { - "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", - "type": "number" - }, - "stop": { - "description": "A sequence of strings to control when the model should stop generating additional tokens.", - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "description": "The sampling temperature to use.", - "type": "number" - }, - "tool_choice": { - "$ref": "#/components/schemas/inference.unified_inference:CompletionToolType" - }, - "tools": { - "description": "A list of tools that the model can call.", - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.unified_inference:CompletionTool" - } - }, - "top_p": { - "description": "Nucleus sampling, an alternative to sampling with temperature.", - "type": "number" - } - }, - "required": [ - "messages" - ] - } - } - } - }, "inference.update": { "content": { "application/json": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 0295fc22a8..d2aba95a03 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -8828,68 +8828,218 @@ "x-state": "Added in 1.3.0" } }, - "/_inference/{inference_id}": { - "get": { + "/_inference/chat_completion/{inference_id}/_stream": { + "post": { "tags": [ "inference" ], - "summary": "Get an inference endpoint", - "operationId": "inference-get-1", + "summary": "Perform chat completion inference", + "operationId": "inference-chat-completion-unified", "parameters": [ { - "$ref": "#/components/parameters/inference.get#inference_id" + "in": "path", + "name": "inference_id", + "description": "The inference Id", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + }, + { + "in": "query", + "name": "timeout", + "description": "Specifies the amount of time to wait for the inference request to complete.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Duration" + }, + "style": "form" } ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "messages": { + "description": "A list of objects representing the conversation.", + "type": "array", + "items": { + "$ref": "#/components/schemas/inference.chat_completion_unified:Message" + } + }, + "model": { + "description": "The ID of the model to use.", + "type": "string" + }, + "max_completion_tokens": { + "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", + "type": "number" + }, + "stop": { + "description": "A sequence of strings to control when the model should stop generating additional tokens.", + "type": "array", + "items": { + "type": "string" + } + }, + "temperature": { + "description": "The sampling temperature to use.", + "type": "number" + }, + "tool_choice": { + "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolType" + }, + "tools": { + "description": "A list of tools that the model can call.", + "type": "array", + "items": { + "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionTool" + } + }, + "top_p": { + "description": "Nucleus sampling, an alternative to sampling with temperature.", + "type": "number" + } + }, + "required": [ + "messages" + ] + } + } + } + }, "responses": { "200": { - "$ref": "#/components/responses/inference.get#200" + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/_types:StreamResult" + } + } + } } }, - "x-state": "Added in 8.11.0" - }, - "put": { + "x-state": "Added in 8.18.0" + } + }, + "/_inference/completion/{inference_id}": { + "post": { "tags": [ "inference" ], - "summary": "Create an inference endpoint", - "description": "When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "operationId": "inference-put", + "summary": "Perform completion inference on the service", + "operationId": "inference-completion", "parameters": [ { - "$ref": "#/components/parameters/inference.put#inference_id" + "in": "path", + "name": "inference_id", + "description": "The inference Id", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + }, + { + "in": "query", + "name": "timeout", + "description": "Specifies the amount of time to wait for the inference request to complete.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Duration" + }, + "style": "form" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.put" + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "input": { + "description": "Inference input.\nEither a string or an array of strings.", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types:TaskSettings" + } + }, + "required": [ + "input" + ] + } + } + } }, "responses": { "200": { - "$ref": "#/components/responses/inference.put#200" + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:CompletionInferenceResult" + } + } + } } }, "x-state": "Added in 8.11.0" - }, - "post": { + } + }, + "/_inference/{inference_id}": { + "get": { "tags": [ "inference" ], - "summary": "Perform inference on the service", - "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "operationId": "inference-inference", + "summary": "Get an inference endpoint", + "operationId": "inference-get-1", "parameters": [ { - "$ref": "#/components/parameters/inference.inference#inference_id" - }, + "$ref": "#/components/parameters/inference.get#inference_id" + } + ], + "responses": { + "200": { + "$ref": "#/components/responses/inference.get#200" + } + }, + "x-state": "Added in 8.11.0" + }, + "put": { + "tags": [ + "inference" + ], + "summary": "Create an inference endpoint", + "description": "When you create an inference endpoint, the associated machine learning model is automatically deployed if it is not already running.\nAfter creating the endpoint, wait for the model deployment to complete before using it.\nTo verify the deployment status, use the get trained model statistics API.\nLook for `\"state\": \"fully_allocated\"` in the response and ensure that the `\"allocation_count\"` matches the `\"target_allocation_count\"`.\nAvoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "operationId": "inference-put", + "parameters": [ { - "$ref": "#/components/parameters/inference.inference#timeout" + "$ref": "#/components/parameters/inference.put#inference_id" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.inference" + "$ref": "#/components/requestBodies/inference.put" }, "responses": { "200": { - "$ref": "#/components/responses/inference.inference#200" + "$ref": "#/components/responses/inference.put#200" } }, "x-state": "Added in 8.11.0" @@ -8966,34 +9116,6 @@ }, "x-state": "Added in 8.11.0" }, - "post": { - "tags": [ - "inference" - ], - "summary": "Perform inference on the service", - "description": "This API enables you to use machine learning models to perform specific tasks on data that you provide as an input.\nIt returns a response with the results of the tasks.\nThe inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.\n\n> info\n> The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "operationId": "inference-inference-1", - "parameters": [ - { - "$ref": "#/components/parameters/inference.inference#task_type" - }, - { - "$ref": "#/components/parameters/inference.inference#inference_id" - }, - { - "$ref": "#/components/parameters/inference.inference#timeout" - } - ], - "requestBody": { - "$ref": "#/components/requestBodies/inference.inference" - }, - "responses": { - "200": { - "$ref": "#/components/responses/inference.inference#200" - } - }, - "x-state": "Added in 8.11.0" - }, "delete": { "tags": [ "inference" @@ -9105,59 +9227,237 @@ "x-state": "Added in 8.16.0" } }, - "/_inference/{inference_id}/_unified": { + "/_inference/rerank/{inference_id}": { "post": { "tags": [ "inference" ], - "summary": "Perform inference on the service using the Unified Schema", - "operationId": "inference-unified-inference", + "summary": "Perform rereanking inference on the service", + "operationId": "inference-rerank", "parameters": [ { - "$ref": "#/components/parameters/inference.unified_inference#inference_id" + "in": "path", + "name": "inference_id", + "description": "The unique identifier for the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" }, { - "$ref": "#/components/parameters/inference.unified_inference#timeout" + "in": "query", + "name": "timeout", + "description": "The amount of time to wait for the inference request to complete.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Duration" + }, + "style": "form" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.unified_inference" + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "query": { + "description": "Query input.", + "type": "string" + }, + "input": { + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types:TaskSettings" + } + }, + "required": [ + "query", + "input" + ] + } + } + } }, "responses": { "200": { - "$ref": "#/components/responses/inference.unified_inference#200" + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:RerankedInferenceResult" + } + } + } } }, - "x-state": "Added in 8.18.0" + "x-state": "Added in 8.11.0" } }, - "/_inference/{task_type}/{inference_id}/_unified": { + "/_inference/sparse_embedding/{inference_id}": { "post": { "tags": [ "inference" ], - "summary": "Perform inference on the service using the Unified Schema", - "operationId": "inference-unified-inference-1", + "summary": "Perform sparse embedding inference on the service", + "operationId": "inference-sparse-embedding", "parameters": [ { - "$ref": "#/components/parameters/inference.unified_inference#task_type" + "in": "path", + "name": "inference_id", + "description": "The inference Id", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" }, { - "$ref": "#/components/parameters/inference.unified_inference#inference_id" + "in": "query", + "name": "timeout", + "description": "Specifies the amount of time to wait for the inference request to complete.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Duration" + }, + "style": "form" + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "input": { + "description": "Inference input.\nEither a string or an array of strings.", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types:TaskSettings" + } + }, + "required": [ + "input" + ] + } + } + } + }, + "responses": { + "200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:SparseEmbeddingInferenceResult" + } + } + } + } + }, + "x-state": "Added in 8.11.0" + } + }, + "/_inference/text_embedding/{inference_id}": { + "post": { + "tags": [ + "inference" + ], + "summary": "Perform text embedding inference on the service", + "operationId": "inference-text-embedding", + "parameters": [ + { + "in": "path", + "name": "inference_id", + "description": "The inference Id", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" }, { - "$ref": "#/components/parameters/inference.unified_inference#timeout" + "in": "query", + "name": "timeout", + "description": "Specifies the amount of time to wait for the inference request to complete.", + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Duration" + }, + "style": "form" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.unified_inference" + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "input": { + "description": "Inference input.\nEither a string or an array of strings.", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + }, + "task_settings": { + "$ref": "#/components/schemas/inference._types:TaskSettings" + } + }, + "required": [ + "input" + ] + } + } + } }, "responses": { "200": { - "$ref": "#/components/responses/inference.unified_inference#200" + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:TextEmbeddingInferenceResult" + } + } + } } }, - "x-state": "Added in 8.18.0" + "x-state": "Added in 8.11.0" } }, "/": { @@ -40954,7 +41254,9 @@ "type": "object", "additionalProperties": { "$ref": "#/components/schemas/_types.mapping:DynamicTemplate" - } + }, + "minProperties": 1, + "maxProperties": 1 } }, "_field_names": { @@ -42043,6 +42345,12 @@ "index": { "type": "boolean" }, + "script": { + "$ref": "#/components/schemas/_types:Script" + }, + "on_script_error": { + "$ref": "#/components/schemas/_types.mapping:OnScriptError" + }, "null_value": { "$ref": "#/components/schemas/_types:DateTime" }, @@ -42085,6 +42393,12 @@ "index": { "type": "boolean" }, + "script": { + "$ref": "#/components/schemas/_types:Script" + }, + "on_script_error": { + "$ref": "#/components/schemas/_types.mapping:OnScriptError" + }, "null_value": { "$ref": "#/components/schemas/_types:DateTime" }, @@ -46404,13 +46718,215 @@ "valid" ] }, + "inference.chat_completion_unified:Message": { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/inference.chat_completion_unified:MessageContent" + }, + "role": { + "description": "The role of the message author.", + "type": "string" + }, + "tool_call_id": { + "$ref": "#/components/schemas/_types:Id" + }, + "tool_calls": { + "description": "The tool calls generated by the model.", + "type": "array", + "items": { + "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCall" + } + } + }, + "required": [ + "role" + ] + }, + "inference.chat_completion_unified:MessageContent": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference.chat_completion_unified:ContentObject" + } + } + ] + }, + "inference.chat_completion_unified:ContentObject": { + "type": "object", + "properties": { + "text": { + "description": "The text content.", + "type": "string" + }, + "type": { + "description": "The type of content.", + "type": "string" + } + }, + "required": [ + "text", + "type" + ] + }, + "inference.chat_completion_unified:ToolCall": { + "type": "object", + "properties": { + "id": { + "$ref": "#/components/schemas/_types:Id" + }, + "function": { + "$ref": "#/components/schemas/inference.chat_completion_unified:ToolCallFunction" + }, + "type": { + "description": "The type of the tool call.", + "type": "string" + } + }, + "required": [ + "id", + "function", + "type" + ] + }, + "inference.chat_completion_unified:ToolCallFunction": { + "type": "object", + "properties": { + "arguments": { + "description": "The arguments to call the function with in JSON format.", + "type": "string" + }, + "name": { + "description": "The name of the function to call.", + "type": "string" + } + }, + "required": [ + "arguments", + "name" + ] + }, + "inference.chat_completion_unified:CompletionToolType": { + "oneOf": [ + { + "type": "string" + }, + { + "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoice" + } + ] + }, + "inference.chat_completion_unified:CompletionToolChoice": { + "type": "object", + "properties": { + "type": { + "description": "The type of the tool.", + "type": "string" + }, + "function": { + "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolChoiceFunction" + } + }, + "required": [ + "type", + "function" + ] + }, + "inference.chat_completion_unified:CompletionToolChoiceFunction": { + "type": "object", + "properties": { + "name": { + "description": "The name of the function to call.", + "type": "string" + } + }, + "required": [ + "name" + ] + }, + "inference.chat_completion_unified:CompletionTool": { + "type": "object", + "properties": { + "type": { + "description": "The type of tool.", + "type": "string" + }, + "function": { + "$ref": "#/components/schemas/inference.chat_completion_unified:CompletionToolFunction" + } + }, + "required": [ + "type", + "function" + ] + }, + "inference.chat_completion_unified:CompletionToolFunction": { + "type": "object", + "properties": { + "description": { + "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.", + "type": "string" + }, + "name": { + "description": "The name of the function.", + "type": "string" + }, + "parameters": { + "description": "The parameters the functional accepts. This should be formatted as a JSON object.", + "type": "object" + }, + "strict": { + "description": "Whether to enable schema adherence when generating the function call.", + "type": "boolean" + } + }, + "required": [ + "name" + ] + }, + "_types:StreamResult": { + "type": "object" + }, + "inference._types:TaskSettings": { + "type": "object" + }, + "inference._types:CompletionInferenceResult": { + "type": "object", + "properties": { + "completion": { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference._types:CompletionResult" + } + } + }, + "required": [ + "completion" + ] + }, + "inference._types:CompletionResult": { + "type": "object", + "properties": { + "result": { + "type": "string" + } + }, + "required": [ + "result" + ] + }, "inference._types:TaskType": { "type": "string", "enum": [ "sparse_embedding", "text_embedding", "rerank", - "completion" + "completion", + "chat_completion" ] }, "inference._types:DeleteInferenceEndpointResult": { @@ -46510,136 +47026,6 @@ "inference._types:ServiceSettings": { "type": "object" }, - "inference._types:TaskSettings": { - "type": "object" - }, - "inference._types:InferenceResult": { - "description": "InferenceResult is an aggregation of mutually exclusive variants", - "type": "object", - "properties": { - "text_embedding_bytes": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult" - } - }, - "text_embedding_bits": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult" - } - }, - "text_embedding": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:TextEmbeddingResult" - } - }, - "sparse_embedding": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:SparseEmbeddingResult" - } - }, - "completion": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:CompletionResult" - } - }, - "rerank": { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference._types:RankedDocument" - } - } - }, - "minProperties": 1, - "maxProperties": 1 - }, - "inference._types:TextEmbeddingByteResult": { - "type": "object", - "properties": { - "embedding": { - "$ref": "#/components/schemas/inference._types:DenseByteVector" - } - }, - "required": [ - "embedding" - ] - }, - "inference._types:DenseByteVector": { - "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.", - "type": "array", - "items": { - "$ref": "#/components/schemas/_types:byte" - } - }, - "inference._types:TextEmbeddingResult": { - "type": "object", - "properties": { - "embedding": { - "$ref": "#/components/schemas/inference._types:DenseVector" - } - }, - "required": [ - "embedding" - ] - }, - "inference._types:DenseVector": { - "description": "Text Embedding results are represented as Dense Vectors\nof floats.", - "type": "array", - "items": { - "type": "number" - } - }, - "inference._types:SparseEmbeddingResult": { - "type": "object", - "properties": { - "embedding": { - "$ref": "#/components/schemas/inference._types:SparseVector" - } - }, - "required": [ - "embedding" - ] - }, - "inference._types:SparseVector": { - "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.", - "type": "object", - "additionalProperties": { - "type": "number" - } - }, - "inference._types:CompletionResult": { - "type": "object", - "properties": { - "result": { - "type": "string" - } - }, - "required": [ - "result" - ] - }, - "inference._types:RankedDocument": { - "type": "object", - "properties": { - "index": { - "type": "number" - }, - "relevance_score": { - "type": "number" - }, - "text": { - "type": "string" - } - }, - "required": [ - "index", - "relevance_score" - ] - }, "inference.put_watsonx:WatsonxTaskType": { "type": "string", "enum": [ @@ -46705,178 +47091,131 @@ } } }, - "inference.unified_inference:Message": { + "inference._types:RerankedInferenceResult": { "type": "object", "properties": { - "content": { - "$ref": "#/components/schemas/inference.unified_inference:MessageContent" - }, - "role": { - "description": "The role of the message author.", - "type": "string" - }, - "tool_call_id": { - "$ref": "#/components/schemas/_types:Id" - }, - "tool_calls": { - "description": "The tool calls generated by the model.", + "rerank": { "type": "array", "items": { - "$ref": "#/components/schemas/inference.unified_inference:ToolCall" + "$ref": "#/components/schemas/inference._types:RankedDocument" } } }, "required": [ - "role" - ] - }, - "inference.unified_inference:MessageContent": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.unified_inference:ContentObject" - } - } + "rerank" ] }, - "inference.unified_inference:ContentObject": { + "inference._types:RankedDocument": { "type": "object", "properties": { - "text": { - "description": "The text content.", - "type": "string" + "index": { + "type": "number" }, - "type": { - "description": "The type of content.", + "relevance_score": { + "type": "number" + }, + "text": { "type": "string" } }, "required": [ - "text", - "type" + "index", + "relevance_score" ] }, - "inference.unified_inference:ToolCall": { + "inference._types:SparseEmbeddingInferenceResult": { "type": "object", "properties": { - "id": { - "$ref": "#/components/schemas/_types:Id" - }, - "function": { - "$ref": "#/components/schemas/inference.unified_inference:ToolCallFunction" - }, - "type": { - "description": "The type of the tool call.", - "type": "string" + "sparse_embedding": { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference._types:SparseEmbeddingResult" + } } }, "required": [ - "id", - "function", - "type" + "sparse_embedding" ] }, - "inference.unified_inference:ToolCallFunction": { + "inference._types:SparseEmbeddingResult": { "type": "object", "properties": { - "arguments": { - "description": "The arguments to call the function with in JSON format.", - "type": "string" - }, - "name": { - "description": "The name of the function to call.", - "type": "string" + "embedding": { + "$ref": "#/components/schemas/inference._types:SparseVector" } }, "required": [ - "arguments", - "name" + "embedding" ] }, - "inference.unified_inference:CompletionToolType": { - "oneOf": [ - { - "type": "string" - }, - { - "$ref": "#/components/schemas/inference.unified_inference:CompletionToolChoice" - } - ] + "inference._types:SparseVector": { + "description": "Sparse Embedding tokens are represented as a dictionary\nof string to double.", + "type": "object", + "additionalProperties": { + "type": "number" + } }, - "inference.unified_inference:CompletionToolChoice": { + "inference._types:TextEmbeddingInferenceResult": { + "description": "TextEmbeddingInferenceResult is an aggregation of mutually exclusive text_embedding variants", "type": "object", "properties": { - "type": { - "description": "The type of the tool.", - "type": "string" + "text_embedding_bytes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult" + } }, - "function": { - "$ref": "#/components/schemas/inference.unified_inference:CompletionToolChoiceFunction" + "text_embedding_bits": { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference._types:TextEmbeddingByteResult" + } + }, + "text_embedding": { + "type": "array", + "items": { + "$ref": "#/components/schemas/inference._types:TextEmbeddingResult" + } } }, - "required": [ - "type", - "function" - ] + "minProperties": 1, + "maxProperties": 1 }, - "inference.unified_inference:CompletionToolChoiceFunction": { + "inference._types:TextEmbeddingByteResult": { "type": "object", "properties": { - "name": { - "description": "The name of the function to call.", - "type": "string" + "embedding": { + "$ref": "#/components/schemas/inference._types:DenseByteVector" } }, "required": [ - "name" + "embedding" ] }, - "inference.unified_inference:CompletionTool": { - "type": "object", - "properties": { - "type": { - "description": "The type of tool.", - "type": "string" - }, - "function": { - "$ref": "#/components/schemas/inference.unified_inference:CompletionToolFunction" - } - }, - "required": [ - "type", - "function" - ] + "inference._types:DenseByteVector": { + "description": "Text Embedding results containing bytes are represented as Dense\nVectors of bytes.", + "type": "array", + "items": { + "$ref": "#/components/schemas/_types:byte" + } }, - "inference.unified_inference:CompletionToolFunction": { + "inference._types:TextEmbeddingResult": { "type": "object", "properties": { - "description": { - "description": "A description of what the function does.\nThis is used by the model to choose when and how to call the function.", - "type": "string" - }, - "name": { - "description": "The name of the function.", - "type": "string" - }, - "parameters": { - "description": "The parameters the functional accepts. This should be formatted as a JSON object.", - "type": "object" - }, - "strict": { - "description": "Whether to enable schema adherence when generating the function call.", - "type": "boolean" + "embedding": { + "$ref": "#/components/schemas/inference._types:DenseVector" } }, "required": [ - "name" + "embedding" ] }, - "_types:StreamResult": { - "type": "object" + "inference._types:DenseVector": { + "description": "Text Embedding results are represented as Dense Vectors\nof floats.", + "type": "array", + "items": { + "type": "number" + } }, "_types:ElasticsearchVersionInfo": { "type": "object", @@ -57252,16 +57591,6 @@ } } }, - "inference.inference#200": { - "description": "", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/inference._types:InferenceResult" - } - } - } - }, "inference.put#200": { "description": "", "content": { @@ -57272,16 +57601,6 @@ } } }, - "inference.unified_inference#200": { - "description": "", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/_types:StreamResult" - } - } - } - }, "ingest.get_pipeline#200": { "description": "", "content": { @@ -61324,38 +61643,6 @@ }, "style": "simple" }, - "inference.inference#task_type": { - "in": "path", - "name": "task_type", - "description": "The type of inference task that the model performs.", - "required": true, - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/inference._types:TaskType" - }, - "style": "simple" - }, - "inference.inference#inference_id": { - "in": "path", - "name": "inference_id", - "description": "The unique identifier for the inference endpoint.", - "required": true, - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/_types:Id" - }, - "style": "simple" - }, - "inference.inference#timeout": { - "in": "query", - "name": "timeout", - "description": "The amount of time to wait for the inference request to complete.", - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/_types:Duration" - }, - "style": "form" - }, "inference.put#task_type": { "in": "path", "name": "task_type", @@ -61378,38 +61665,6 @@ }, "style": "simple" }, - "inference.unified_inference#task_type": { - "in": "path", - "name": "task_type", - "description": "The task type", - "required": true, - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/inference._types:TaskType" - }, - "style": "simple" - }, - "inference.unified_inference#inference_id": { - "in": "path", - "name": "inference_id", - "description": "The inference Id", - "required": true, - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/_types:Id" - }, - "style": "simple" - }, - "inference.unified_inference#timeout": { - "in": "query", - "name": "timeout", - "description": "Specifies the amount of time to wait for the inference request to complete.", - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/_types:Duration" - }, - "style": "form" - }, "ingest.get_pipeline#id": { "in": "path", "name": "id", @@ -64258,7 +64513,9 @@ "type": "object", "additionalProperties": { "$ref": "#/components/schemas/_types.mapping:DynamicTemplate" - } + }, + "minProperties": 1, + "maxProperties": 1 } }, "_field_names": { @@ -64400,41 +64657,6 @@ } } }, - "inference.inference": { - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "query": { - "description": "The query input, which is required only for the `rerank` task.\nIt is not required for other tasks.", - "type": "string" - }, - "input": { - "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\n> info\n> Inference endpoints for the `completion` task type currently only support a single string as input.", - "oneOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - }, - "task_settings": { - "$ref": "#/components/schemas/inference._types:TaskSettings" - } - }, - "required": [ - "input" - ] - } - } - } - }, "inference.put": { "content": { "application/json": { @@ -64445,60 +64667,6 @@ }, "required": true }, - "inference.unified_inference": { - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "messages": { - "description": "A list of objects representing the conversation.", - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.unified_inference:Message" - } - }, - "model": { - "description": "The ID of the model to use.", - "type": "string" - }, - "max_completion_tokens": { - "description": "The upper bound limit for the number of tokens that can be generated for a completion request.", - "type": "number" - }, - "stop": { - "description": "A sequence of strings to control when the model should stop generating additional tokens.", - "type": "array", - "items": { - "type": "string" - } - }, - "temperature": { - "description": "The sampling temperature to use.", - "type": "number" - }, - "tool_choice": { - "$ref": "#/components/schemas/inference.unified_inference:CompletionToolType" - }, - "tools": { - "description": "A list of tools that the model can call.", - "type": "array", - "items": { - "$ref": "#/components/schemas/inference.unified_inference:CompletionTool" - } - }, - "top_p": { - "description": "Nucleus sampling, an alternative to sampling with temperature.", - "type": "number" - } - }, - "required": [ - "messages" - ] - } - } - } - }, "ingest.simulate": { "content": { "application/json": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 9d7da800bd..9274cc67ff 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -80776,7 +80776,7 @@ "namespace": "_builtins" } }, - "singleKey": false, + "singleKey": true, "value": { "kind": "instance_of", "type": { @@ -139918,7 +139918,7 @@ "namespace": "_builtins" } }, - "singleKey": false, + "singleKey": true, "value": { "kind": "instance_of", "type": { diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 0ff55fcdd0..06715c0555 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -5899,7 +5899,7 @@ export interface MappingTypeMapping { date_detection?: boolean dynamic?: MappingDynamicMapping dynamic_date_formats?: string[] - dynamic_templates?: Record[] + dynamic_templates?: Partial>[] _field_names?: MappingFieldNamesField index_field?: MappingIndexField _meta?: Metadata @@ -12363,7 +12363,7 @@ export interface IndicesPutMappingRequest extends RequestBase { date_detection?: boolean dynamic?: MappingDynamicMapping dynamic_date_formats?: string[] - dynamic_templates?: Record[] + dynamic_templates?: Partial>[] _field_names?: MappingFieldNamesField _meta?: Metadata numeric_detection?: boolean