From f637e9628430ce32309433dd0e707be2b0849fb8 Mon Sep 17 00:00:00 2001 From: lcawl Date: Tue, 31 Dec 2024 17:43:54 -0800 Subject: [PATCH 1/7] Add inference.stream_inference --- compiler/src/model/utils.ts | 2 +- .../elasticsearch-openapi-overlays.yaml | 10 ++ output/openapi/elasticsearch-openapi.json | 92 ++++++++++++++++++ output/schema/schema.json | 95 ++++++++++++++++++- output/schema/validation-errors.json | 20 ++++ output/typescript/types.ts | 12 +++ specification/_doc_ids/table.csv | 1 + .../StreamInferenceRequest.ts | 57 +++++++++++ .../StreamInferenceRequestExample1.yaml | 6 ++ .../StreamInferenceResponse.ts | 22 +++++ 10 files changed, 312 insertions(+), 5 deletions(-) create mode 100644 specification/inference/stream_inference/StreamInferenceRequest.ts create mode 100644 specification/inference/stream_inference/StreamInferenceRequestExample1.yaml create mode 100644 specification/inference/stream_inference/StreamInferenceResponse.ts diff --git a/compiler/src/model/utils.ts b/compiler/src/model/utils.ts index 06ddabe650..f3452accca 100644 --- a/compiler/src/model/utils.ts +++ b/compiler/src/model/utils.ts @@ -670,7 +670,7 @@ export function hoistRequestAnnotations ( 'manage_enrich', 'manage_ilm', 'manage_index_templates', 'manage_inference', 'manage_ingest_pipelines', 'manage_logstash_pipelines', 'manage_ml', 'manage_oidc', 'manage_own_api_key', 'manage_pipeline', 'manage_rollup', 'manage_saml', 'manage_search_application', 'manage_search_query_rules', 'manage_search_synonyms', 'manage_security', 'manage_service_account', 'manage_slm', 'manage_token', 'manage_transform', 'manage_user_profile', - 'manage_watcher', 'monitor', 'monitor_ml', 'monitor_rollup', 'monitor_snapshot', 'monitor_text_structure', + 'manage_watcher', 'monitor', 'monitor_inference', 'monitor_ml', 'monitor_rollup', 'monitor_snapshot', 'monitor_text_structure', 'monitor_transform', 'monitor_watcher', 'read_ccr', 'read_ilm', 'read_pipeline', 'read_security', 'read_slm', 'transport_client' ] const values = parseCommaSeparated(value) diff --git a/docs/overlays/elasticsearch-openapi-overlays.yaml b/docs/overlays/elasticsearch-openapi-overlays.yaml index 4be0ef8ad7..29291eb394 100644 --- a/docs/overlays/elasticsearch-openapi-overlays.yaml +++ b/docs/overlays/elasticsearch-openapi-overlays.yaml @@ -394,6 +394,7 @@ actions: examples: indicesLegacyPutTemplateRequestExample1: $ref: "../../specification/indices/put_template/indicesPutTemplateRequestExample1.yaml" +<<<<<<< HEAD ## Examples for behavioral analytics - target: "$.paths['/_application/analytics/{collection_name}/event/{event_type}']['post']" description: "Add examples for post analytics collection event operation" @@ -418,10 +419,15 @@ actions: ## Examples for ingest - target: "$.components['requestBodies']['simulate.ingest']" description: "Add example for simulate ingest request" +======= + - target: "$.components['requestBodies']['inference.stream_inference']" + description: "Add example for inference stream request" +>>>>>>> dc0cd555b (Add inference.stream_inference) update: content: application/json: examples: +<<<<<<< HEAD simulateIngestRequestExample1: $ref: "../../specification/simulate/ingest/examples/request/SimulateIngestRequestExample1.yaml" simulateIngestRequestExample2: @@ -567,3 +573,7 @@ actions: updateWatcherSettingsRequestExample1: $ref: "../../specification/watcher/get_settings/examples/200_response/WatcherGetSettingsResponseExample1.yaml" +======= + streamInferenceRequestExample1: + $ref: "../../specification/inference/stream_inference/StreamInferenceRequestExample1.yaml" +>>>>>>> dc0cd555b (Add inference.stream_inference) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 619db3f0ee..f3aaa2201b 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -15985,11 +15985,16 @@ "x-state": "Added in 8.11.0" } }, +<<<<<<< HEAD "/_inference/{inference_id}/_unified": { +======= + "/_inference/{inference_id}/_stream": { +>>>>>>> dc0cd555b (Add inference.stream_inference) "post": { "tags": [ "inference" ], +<<<<<<< HEAD "summary": "Perform inference on the service using the Unified Schema", "operationId": "inference-unified-inference", "parameters": [ @@ -16012,10 +16017,33 @@ } }, "/_inference/{task_type}/{inference_id}/_unified": { +======= + "summary": "Perform streaming inference", + "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", + "operationId": "inference-stream-inference", + "parameters": [ + { + "$ref": "#/components/parameters/inference.stream_inference#inference_id" + } + ], + "requestBody": { + "$ref": "#/components/requestBodies/inference.stream_inference" + }, + "responses": { + "200": { + "$ref": "#/components/responses/inference.stream_inference#200" + } + }, + "x-state": "Added in 8.16.0" + } + }, + "/_inference/{task_type}/{inference_id}/_stream": { +>>>>>>> dc0cd555b (Add inference.stream_inference) "post": { "tags": [ "inference" ], +<<<<<<< HEAD "summary": "Perform inference on the service using the Unified Schema", "operationId": "inference-unified-inference-1", "parameters": [ @@ -16038,6 +16066,28 @@ } }, "x-state": "Added in 8.18.0" +======= + "summary": "Perform streaming inference", + "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", + "operationId": "inference-stream-inference-1", + "parameters": [ + { + "$ref": "#/components/parameters/inference.stream_inference#task_type" + }, + { + "$ref": "#/components/parameters/inference.stream_inference#inference_id" + } + ], + "requestBody": { + "$ref": "#/components/requestBodies/inference.stream_inference" + }, + "responses": { + "200": { + "$ref": "#/components/responses/inference.stream_inference#200" + } + }, + "x-state": "Added in 8.16.0" +>>>>>>> dc0cd555b (Add inference.stream_inference) } }, "/": { @@ -94178,12 +94228,20 @@ } } }, +<<<<<<< HEAD "inference.unified_inference#200": { +======= + "inference.stream_inference#200": { +>>>>>>> dc0cd555b (Add inference.stream_inference) "description": "", "content": { "application/json": { "schema": { +<<<<<<< HEAD "$ref": "#/components/schemas/_types:StreamResult" +======= + "type": "object" +>>>>>>> dc0cd555b (Add inference.stream_inference) } } } @@ -101627,6 +101685,7 @@ }, "style": "simple" }, +<<<<<<< HEAD "inference.unified_inference#task_type": { "in": "path", "name": "task_type", @@ -101642,6 +101701,12 @@ "in": "path", "name": "inference_id", "description": "The inference Id", +======= + "inference.stream_inference#inference_id": { + "in": "path", + "name": "inference_id", + "description": "The unique identifier for the inference endpoint.", +>>>>>>> dc0cd555b (Add inference.stream_inference) "required": true, "deprecated": false, "schema": { @@ -101649,6 +101714,7 @@ }, "style": "simple" }, +<<<<<<< HEAD "inference.unified_inference#timeout": { "in": "query", "name": "timeout", @@ -101658,6 +101724,18 @@ "$ref": "#/components/schemas/_types:Duration" }, "style": "form" +======= + "inference.stream_inference#task_type": { + "in": "path", + "name": "task_type", + "description": "The type of task that the model performs.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference._types:TaskType" + }, + "style": "simple" +>>>>>>> dc0cd555b (Add inference.stream_inference) }, "ingest.get_geoip_database#id": { "in": "path", @@ -107192,12 +107270,17 @@ }, "required": true }, +<<<<<<< HEAD "inference.unified_inference": { +======= + "inference.stream_inference": { +>>>>>>> dc0cd555b (Add inference.stream_inference) "content": { "application/json": { "schema": { "type": "object", "properties": { +<<<<<<< HEAD "messages": { "description": "A list of objects representing the conversation.", "type": "array", @@ -107241,6 +107324,15 @@ }, "required": [ "messages" +======= + "input": { + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.", + "type": "string" + } + }, + "required": [ + "input" +>>>>>>> dc0cd555b (Add inference.stream_inference) ] } } diff --git a/output/schema/schema.json b/output/schema/schema.json index 8fa956c181..dd9c0c1e1e 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -8817,19 +8817,32 @@ { "availability": { "stack": { + "since": "8.16.0", "stability": "stable", "visibility": "public" } }, - "description": "Perform streaming inference", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html", + "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", + "docId": "inference-api-stream", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html", "name": "inference.stream_inference", - "request": null, + "privileges": { + "cluster": [ + "monitor_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.stream_inference" + }, "requestBodyRequired": false, "requestMediaType": [ "application/json" ], - "response": null, + "response": { + "name": "Response", + "namespace": "inference.stream_inference" + }, "responseMediaType": [ "text/event-stream" ], @@ -144176,6 +144189,80 @@ }, "specLocation": "inference/put/PutResponse.ts#L22-L24" }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "properties", + "properties": [ + { + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.", + "name": "input", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] + }, + "description": "Perform streaming inference.\nGet real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.stream_inference" + }, + "path": [ + { + "description": "The unique identifier for the inference endpoint.", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + }, + { + "description": "The type of task that the model performs.", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskType", + "namespace": "inference._types" + } + } + } + ], + "query": [], + "specLocation": "inference/stream_inference/StreamInferenceRequest.ts#L24-L57" + }, + { + "kind": "response", + "body": { + "kind": "properties", + "properties": [] + }, + "name": { + "name": "Response", + "namespace": "inference.stream_inference" + }, + "specLocation": "inference/stream_inference/StreamInferenceResponse.ts#L20-L22" + }, { "kind": "interface", "description": "A list of tools that the model can call.", diff --git a/output/schema/validation-errors.json b/output/schema/validation-errors.json index 57542f9d1b..3ff0079c44 100644 --- a/output/schema/validation-errors.json +++ b/output/schema/validation-errors.json @@ -259,9 +259,29 @@ ], "response": [] }, +<<<<<<< HEAD "inference.stream_inference": { "request": [ "Missing request & response" +======= + "ingest.delete_geoip_database": { + "request": [ + "Request: query parameter 'master_timeout' does not exist in the json spec", + "Request: query parameter 'timeout' does not exist in the json spec" + ], + "response": [] + }, + "ingest.delete_ip_location_database": { + "request": [ + "Request: query parameter 'master_timeout' does not exist in the json spec", + "Request: query parameter 'timeout' does not exist in the json spec" + ], + "response": [] + }, + "ingest.get_geoip_database": { + "request": [ + "Request: query parameter 'master_timeout' does not exist in the json spec" +>>>>>>> dc0cd555b (Add inference.stream_inference) ], "response": [] }, diff --git a/output/typescript/types.ts b/output/typescript/types.ts index f8b9a2684d..fb2229f057 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -12939,6 +12939,7 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo +<<<<<<< HEAD export interface InferenceUnifiedInferenceCompletionTool { type: string function: InferenceUnifiedInferenceCompletionToolFunction @@ -13003,6 +13004,17 @@ export interface InferenceUnifiedInferenceToolCall { export interface InferenceUnifiedInferenceToolCallFunction { arguments: string name: string +======= +export interface InferenceStreamInferenceRequest extends RequestBase { + inference_id: Id + task_type?: InferenceTaskType + body?: { + input: string + } +} + +export interface InferenceStreamInferenceResponse { +>>>>>>> dc0cd555b (Add inference.stream_inference) } export interface IngestAppendProcessor extends IngestProcessorBase { diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 7b06d87c01..c8c6eae7bf 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -272,6 +272,7 @@ indices-template-exists-v1,https://www.elastic.co/guide/en/elasticsearch/referen indices-templates,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/indices-templates.html indices-update-settings,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/indices-update-settings.html infer-trained-model-deployment,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-trained-model-deployment.html +inference-api-stream,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html inference-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-processor.html info-api,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/info-api.html ingest,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/ingest.html diff --git a/specification/inference/stream_inference/StreamInferenceRequest.ts b/specification/inference/stream_inference/StreamInferenceRequest.ts new file mode 100644 index 0000000000..764496c861 --- /dev/null +++ b/specification/inference/stream_inference/StreamInferenceRequest.ts @@ -0,0 +1,57 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { TaskType } from '@inference/_types/TaskType' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Perform streaming inference. + * Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation. + * This API works only with the completion task type. + * + * IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. + * + * This API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming. + * @rest_spec_name inference.stream_inference + * @availability stack since=8.16.0 stability=stable visibility=public + * @cluster_privileges monitor_inference + * @doc_id inference-api-stream + */ +export interface Request extends RequestBase { + path_parts: { + /** + * The unique identifier for the inference endpoint. + */ + inference_id: Id + /** + * The type of task that the model performs. + */ + task_type?: TaskType + } + body: { + /** + * The text on which you want to perform the inference task. + * It can be a single string or an array. + * + * NOTE: Inference endpoints for the completion task type currently only support a single string as input. + */ + input: string + } +} diff --git a/specification/inference/stream_inference/StreamInferenceRequestExample1.yaml b/specification/inference/stream_inference/StreamInferenceRequestExample1.yaml new file mode 100644 index 0000000000..12a0b1cbb0 --- /dev/null +++ b/specification/inference/stream_inference/StreamInferenceRequestExample1.yaml @@ -0,0 +1,6 @@ +summary: Perform a completion task +description: Run `POST _inference/completion/openai-completion/_stream` to perform a completion on the example question with streaming. +# method_request: "POST _inference/completion/openai-completion/_stream" +# type: "request" +value: + input: 'What is Elastic?' diff --git a/specification/inference/stream_inference/StreamInferenceResponse.ts b/specification/inference/stream_inference/StreamInferenceResponse.ts new file mode 100644 index 0000000000..0769bf66cb --- /dev/null +++ b/specification/inference/stream_inference/StreamInferenceResponse.ts @@ -0,0 +1,22 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +export class Response { + body: {} +} From 56ca742a8cc3bcf47df2e280804412f53ecc08b8 Mon Sep 17 00:00:00 2001 From: lcawl Date: Thu, 2 Jan 2025 14:54:39 -0800 Subject: [PATCH 2/7] Add inference.update --- output/openapi/elasticsearch-openapi.json | 93 ++++++++++++++ output/schema/schema.json | 121 ++++++++++++++++++ output/typescript/types.ts | 8 ++ specification/_doc_ids/table.csv | 1 + .../_json_spec/inference.update.json | 45 +++++++ .../update/UpdateInferenceRequest.ts | 51 ++++++++ .../update/UpdateInferenceResponse.ts | 24 ++++ 7 files changed, 343 insertions(+) create mode 100644 specification/_json_spec/inference.update.json create mode 100644 specification/inference/update/UpdateInferenceRequest.ts create mode 100644 specification/inference/update/UpdateInferenceResponse.ts diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index f3aaa2201b..d9f587e45c 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -16090,6 +16090,57 @@ >>>>>>> dc0cd555b (Add inference.stream_inference) } }, + "/_inference/{inference_id}/_update": { + "post": { + "tags": [ + "inference" + ], + "summary": "Update an inference endpoint", + "description": "Modify `task_settings`, secrets (within `service_settings`), or `num_allocations` for an inference endpoint, depending on the specific endpoint service and `task_type`.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "operationId": "inference-update", + "parameters": [ + { + "$ref": "#/components/parameters/inference.update#inference_id" + } + ], + "requestBody": { + "$ref": "#/components/requestBodies/inference.update" + }, + "responses": { + "200": { + "$ref": "#/components/responses/inference.update#200" + } + }, + "x-state": "Added in 8.17.0" + } + }, + "/_inference/{task_type}/{inference_id}/_update": { + "post": { + "tags": [ + "inference" + ], + "summary": "Update an inference endpoint", + "description": "Modify `task_settings`, secrets (within `service_settings`), or `num_allocations` for an inference endpoint, depending on the specific endpoint service and `task_type`.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "operationId": "inference-update-1", + "parameters": [ + { + "$ref": "#/components/parameters/inference.update#task_type" + }, + { + "$ref": "#/components/parameters/inference.update#inference_id" + } + ], + "requestBody": { + "$ref": "#/components/requestBodies/inference.update" + }, + "responses": { + "200": { + "$ref": "#/components/responses/inference.update#200" + } + }, + "x-state": "Added in 8.17.0" + } + }, "/": { "get": { "tags": [ @@ -94246,6 +94297,16 @@ } } }, + "inference.update#200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpointInfo" + } + } + } + }, "ingest.get_geoip_database#200": { "description": "", "content": { @@ -101737,6 +101798,28 @@ "style": "simple" >>>>>>> dc0cd555b (Add inference.stream_inference) }, + "inference.update#inference_id": { + "in": "path", + "name": "inference_id", + "description": "The unique identifier of the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + }, + "inference.update#task_type": { + "in": "path", + "name": "task_type", + "description": "The type of inference task that the model performs.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference._types:TaskType" + }, + "style": "simple" + }, "ingest.get_geoip_database#id": { "in": "path", "name": "id", @@ -107338,6 +107421,16 @@ } } }, + "inference.update": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/inference._types:InferenceEndpoint" + } + } + }, + "required": true + }, "ingest.simulate": { "content": { "application/json": { diff --git a/output/schema/schema.json b/output/schema/schema.json index dd9c0c1e1e..5104fa7853 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -8861,6 +8861,53 @@ } ] }, + { + "availability": { + "stack": { + "since": "8.17.0", + "stability": "stable", + "visibility": "public" + } + }, + "description": "Update an inference endpoint.\n\nModify `task_settings`, secrets (within `service_settings`), or `num_allocations` for an inference endpoint, depending on the specific endpoint service and `task_type`.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "docId": "inference-api-update", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/update-inference-api.html", + "name": "inference.update", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, + "request": { + "name": "Request", + "namespace": "inference.update" + }, + "requestBodyRequired": true, + "requestMediaType": [ + "application/json" + ], + "response": { + "name": "Response", + "namespace": "inference.update" + }, + "responseMediaType": [ + "application/json" + ], + "urls": [ + { + "methods": [ + "POST" + ], + "path": "/_inference/{inference_id}/_update" + }, + { + "methods": [ + "POST" + ], + "path": "/_inference/{task_type}/{inference_id}/_update" + } + ] + }, { "availability": { "serverless": { @@ -144263,6 +144310,80 @@ }, "specLocation": "inference/stream_inference/StreamInferenceResponse.ts#L20-L22" }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "value", + "codegenName": "inference_config", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + } + }, + "description": "Update an inference endpoint.\n\nModify `task_settings`, secrets (within `service_settings`), or `num_allocations` for an inference endpoint, depending on the specific endpoint service and `task_type`.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.update" + }, + "path": [ + { + "description": "The unique identifier of the inference endpoint.", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + }, + { + "description": "The type of inference task that the model performs.", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskType", + "namespace": "inference._types" + } + } + } + ], + "query": [], + "specLocation": "inference/update/UpdateInferenceRequest.ts#L25-L51" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.update" + }, + "specLocation": "inference/update/UpdateInferenceResponse.ts#L22-L24" + }, { "kind": "interface", "description": "A list of tools that the model can call.", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index fb2229f057..2f6dd416ec 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13017,6 +13017,14 @@ export interface InferenceStreamInferenceResponse { >>>>>>> dc0cd555b (Add inference.stream_inference) } +export interface InferenceUpdateRequest extends RequestBase { + inference_id: Id + task_type?: InferenceTaskType + body?: InferenceInferenceEndpoint +} + +export type InferenceUpdateResponse = InferenceInferenceEndpointInfo + export interface IngestAppendProcessor extends IngestProcessorBase { field: Field value: any | any[] diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index c8c6eae7bf..5db38249b3 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -273,6 +273,7 @@ indices-templates,https://www.elastic.co/guide/en/elasticsearch/reference/{branc indices-update-settings,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/indices-update-settings.html infer-trained-model-deployment,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-trained-model-deployment.html inference-api-stream,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/stream-inference-api.html +inference-api-update,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/update-inference-api.html inference-processor,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/inference-processor.html info-api,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/info-api.html ingest,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/ingest.html diff --git a/specification/_json_spec/inference.update.json b/specification/_json_spec/inference.update.json new file mode 100644 index 0000000000..6c458ce080 --- /dev/null +++ b/specification/_json_spec/inference.update.json @@ -0,0 +1,45 @@ +{ + "inference.update": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/update-inference-api.html", + "description": "Update inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{inference_id}/_update", + "methods": ["POST"], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + }, + { + "path": "/_inference/{task_type}/{inference_id}/_update", + "methods": ["POST"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/specification/inference/update/UpdateInferenceRequest.ts b/specification/inference/update/UpdateInferenceRequest.ts new file mode 100644 index 0000000000..ecaf6dbc3a --- /dev/null +++ b/specification/inference/update/UpdateInferenceRequest.ts @@ -0,0 +1,51 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpoint } from '@inference/_types/Services' +import { TaskType } from '@inference/_types/TaskType' +import { RequestBase } from '@_types/Base' +import { Id } from '@_types/common' + +/** + * Update an inference endpoint. + * + * Modify `task_settings`, secrets (within `service_settings`), or `num_allocations` for an inference endpoint, depending on the specific endpoint service and `task_type`. + * + * IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. + * For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. + * However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs. + * @rest_spec_name inference.update + * @availability stack since=8.17.0 stability=stable visibility=public + * @cluster_privileges manage_inference + * @doc_id inference-api-update + */ +export interface Request extends RequestBase { + path_parts: { + /** + * The unique identifier of the inference endpoint. + */ + inference_id: Id + /** + * The type of inference task that the model performs. + */ + task_type?: TaskType + } + /** @codegen_name inference_config */ + body: InferenceEndpoint +} diff --git a/specification/inference/update/UpdateInferenceResponse.ts b/specification/inference/update/UpdateInferenceResponse.ts new file mode 100644 index 0000000000..d40639b031 --- /dev/null +++ b/specification/inference/update/UpdateInferenceResponse.ts @@ -0,0 +1,24 @@ +/* + * Licensed to Elasticsearch B.V. under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import { InferenceEndpointInfo } from '@inference/_types/Services' + +export class Response { + body: InferenceEndpointInfo +} From 644354e5a08652e96b346ab809d3743abc85c4d8 Mon Sep 17 00:00:00 2001 From: lcawl Date: Tue, 7 Jan 2025 08:17:14 -0800 Subject: [PATCH 3/7] Add array of strings to stream inference input --- output/openapi/elasticsearch-openapi.json | 12 ++++++++- output/schema/schema.json | 25 +++++++++++++++---- output/typescript/types.ts | 2 +- .../StreamInferenceRequest.ts | 2 +- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index d9f587e45c..b1d3c783d7 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -107410,7 +107410,17 @@ ======= "input": { "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.", - "type": "string" + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] } }, "required": [ diff --git a/output/schema/schema.json b/output/schema/schema.json index 5104fa7853..2497027680 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -144249,11 +144249,26 @@ "name": "input", "required": true, "type": { - "kind": "instance_of", - "type": { - "name": "string", - "namespace": "_builtins" - } + "kind": "union_of", + "items": [ + { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + }, + { + "kind": "array_of", + "value": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ] } } ] diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 2f6dd416ec..b38a858f59 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13009,7 +13009,7 @@ export interface InferenceStreamInferenceRequest extends RequestBase { inference_id: Id task_type?: InferenceTaskType body?: { - input: string + input: string | string[] } } diff --git a/specification/inference/stream_inference/StreamInferenceRequest.ts b/specification/inference/stream_inference/StreamInferenceRequest.ts index 764496c861..92d26e5b96 100644 --- a/specification/inference/stream_inference/StreamInferenceRequest.ts +++ b/specification/inference/stream_inference/StreamInferenceRequest.ts @@ -52,6 +52,6 @@ export interface Request extends RequestBase { * * NOTE: Inference endpoints for the completion task type currently only support a single string as input. */ - input: string + input: string | string[] } } From 6ae51983450525d0d7576789b9de358148cf2d67 Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Mon, 13 Jan 2025 13:10:57 +0100 Subject: [PATCH 4/7] update inference response with streamresult type --- output/openapi/elasticsearch-openapi.json | 187 ++++++++-------- output/schema/schema.json | 210 +++++++++--------- output/schema/validation-errors.json | 26 --- output/typescript/types.ts | 22 +- .../StreamInferenceResponse.ts | 4 +- 5 files changed, 219 insertions(+), 230 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index b1d3c783d7..322e4033ff 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -15985,43 +15985,42 @@ "x-state": "Added in 8.11.0" } }, -<<<<<<< HEAD - "/_inference/{inference_id}/_unified": { -======= "/_inference/{inference_id}/_stream": { ->>>>>>> dc0cd555b (Add inference.stream_inference) "post": { "tags": [ "inference" ], -<<<<<<< HEAD - "summary": "Perform inference on the service using the Unified Schema", - "operationId": "inference-unified-inference", + "summary": "Perform streaming inference", + "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", + "operationId": "inference-stream-inference", "parameters": [ { - "$ref": "#/components/parameters/inference.unified_inference#inference_id" - }, - { - "$ref": "#/components/parameters/inference.unified_inference#timeout" + "$ref": "#/components/parameters/inference.stream_inference#inference_id" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.unified_inference" + "$ref": "#/components/requestBodies/inference.stream_inference" }, "responses": { "200": { - "$ref": "#/components/responses/inference.unified_inference#200" + "$ref": "#/components/responses/inference.stream_inference#200" } }, - "x-state": "Added in 8.18.0" + "x-state": "Added in 8.16.0" } }, - "/_inference/{task_type}/{inference_id}/_unified": { -======= + "/_inference/{task_type}/{inference_id}/_stream": { + "post": { + "tags": [ + "inference" + ], "summary": "Perform streaming inference", "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", - "operationId": "inference-stream-inference", + "operationId": "inference-stream-inference-1", "parameters": [ + { + "$ref": "#/components/parameters/inference.stream_inference#task_type" + }, { "$ref": "#/components/parameters/inference.stream_inference#inference_id" } @@ -16037,19 +16036,14 @@ "x-state": "Added in 8.16.0" } }, - "/_inference/{task_type}/{inference_id}/_stream": { ->>>>>>> dc0cd555b (Add inference.stream_inference) + "/_inference/{inference_id}/_unified": { "post": { "tags": [ "inference" ], -<<<<<<< HEAD "summary": "Perform inference on the service using the Unified Schema", - "operationId": "inference-unified-inference-1", + "operationId": "inference-unified-inference", "parameters": [ - { - "$ref": "#/components/parameters/inference.unified_inference#task_type" - }, { "$ref": "#/components/parameters/inference.unified_inference#inference_id" }, @@ -16066,28 +16060,35 @@ } }, "x-state": "Added in 8.18.0" -======= - "summary": "Perform streaming inference", - "description": "Get real-time responses for completion tasks by delivering answers incrementally, reducing response times during computation.\nThis API works only with the completion task type.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nThis API requires the `monitor_inference` cluster privilege (the built-in `inference_admin` and `inference_user` roles grant this privilege). You must use a client that supports streaming.", - "operationId": "inference-stream-inference-1", + } + }, + "/_inference/{task_type}/{inference_id}/_unified": { + "post": { + "tags": [ + "inference" + ], + "summary": "Perform inference on the service using the Unified Schema", + "operationId": "inference-unified-inference-1", "parameters": [ { - "$ref": "#/components/parameters/inference.stream_inference#task_type" + "$ref": "#/components/parameters/inference.unified_inference#task_type" }, { - "$ref": "#/components/parameters/inference.stream_inference#inference_id" + "$ref": "#/components/parameters/inference.unified_inference#inference_id" + }, + { + "$ref": "#/components/parameters/inference.unified_inference#timeout" } ], "requestBody": { - "$ref": "#/components/requestBodies/inference.stream_inference" + "$ref": "#/components/requestBodies/inference.unified_inference" }, "responses": { "200": { - "$ref": "#/components/responses/inference.stream_inference#200" + "$ref": "#/components/responses/inference.unified_inference#200" } }, - "x-state": "Added in 8.16.0" ->>>>>>> dc0cd555b (Add inference.stream_inference) + "x-state": "Added in 8.18.0" } }, "/_inference/{inference_id}/_update": { @@ -72753,6 +72754,9 @@ "score" ] }, + "_types:StreamResult": { + "type": "object" + }, "inference.unified_inference:Message": { "type": "object", "properties": { @@ -72923,9 +72927,6 @@ "name" ] }, - "_types:StreamResult": { - "type": "object" - }, "_types:ElasticsearchVersionInfo": { "type": "object", "properties": { @@ -94279,20 +94280,22 @@ } } }, -<<<<<<< HEAD - "inference.unified_inference#200": { -======= "inference.stream_inference#200": { ->>>>>>> dc0cd555b (Add inference.stream_inference) "description": "", "content": { "application/json": { "schema": { -<<<<<<< HEAD "$ref": "#/components/schemas/_types:StreamResult" -======= - "type": "object" ->>>>>>> dc0cd555b (Add inference.stream_inference) + } + } + } + }, + "inference.unified_inference#200": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/_types:StreamResult" } } } @@ -101746,7 +101749,28 @@ }, "style": "simple" }, -<<<<<<< HEAD + "inference.stream_inference#inference_id": { + "in": "path", + "name": "inference_id", + "description": "The unique identifier for the inference endpoint.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/_types:Id" + }, + "style": "simple" + }, + "inference.stream_inference#task_type": { + "in": "path", + "name": "task_type", + "description": "The type of task that the model performs.", + "required": true, + "deprecated": false, + "schema": { + "$ref": "#/components/schemas/inference._types:TaskType" + }, + "style": "simple" + }, "inference.unified_inference#task_type": { "in": "path", "name": "task_type", @@ -101762,12 +101786,6 @@ "in": "path", "name": "inference_id", "description": "The inference Id", -======= - "inference.stream_inference#inference_id": { - "in": "path", - "name": "inference_id", - "description": "The unique identifier for the inference endpoint.", ->>>>>>> dc0cd555b (Add inference.stream_inference) "required": true, "deprecated": false, "schema": { @@ -101775,7 +101793,6 @@ }, "style": "simple" }, -<<<<<<< HEAD "inference.unified_inference#timeout": { "in": "query", "name": "timeout", @@ -101785,18 +101802,6 @@ "$ref": "#/components/schemas/_types:Duration" }, "style": "form" -======= - "inference.stream_inference#task_type": { - "in": "path", - "name": "task_type", - "description": "The type of task that the model performs.", - "required": true, - "deprecated": false, - "schema": { - "$ref": "#/components/schemas/inference._types:TaskType" - }, - "style": "simple" ->>>>>>> dc0cd555b (Add inference.stream_inference) }, "inference.update#inference_id": { "in": "path", @@ -107353,17 +107358,40 @@ }, "required": true }, -<<<<<<< HEAD - "inference.unified_inference": { -======= "inference.stream_inference": { ->>>>>>> dc0cd555b (Add inference.stream_inference) "content": { "application/json": { "schema": { "type": "object", "properties": { -<<<<<<< HEAD + "input": { + "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.", + "oneOf": [ + { + "type": "string" + }, + { + "type": "array", + "items": { + "type": "string" + } + } + ] + } + }, + "required": [ + "input" + ] + } + } + } + }, + "inference.unified_inference": { + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { "messages": { "description": "A list of objects representing the conversation.", "type": "array", @@ -107407,25 +107435,6 @@ }, "required": [ "messages" -======= - "input": { - "description": "The text on which you want to perform the inference task.\nIt can be a single string or an array.\n\nNOTE: Inference endpoints for the completion task type currently only support a single string as input.", - "oneOf": [ - { - "type": "string" - }, - { - "type": "array", - "items": { - "type": "string" - } - } - ] - } - }, - "required": [ - "input" ->>>>>>> dc0cd555b (Add inference.stream_inference) ] } } diff --git a/output/schema/schema.json b/output/schema/schema.json index 2497027680..fb2393b9ad 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -8863,93 +8863,93 @@ }, { "availability": { + "serverless": { + "stability": "stable", + "visibility": "public" + }, "stack": { - "since": "8.17.0", + "since": "8.18.0", "stability": "stable", "visibility": "public" } }, - "description": "Update an inference endpoint.\n\nModify `task_settings`, secrets (within `service_settings`), or `num_allocations` for an inference endpoint, depending on the specific endpoint service and `task_type`.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "docId": "inference-api-update", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/update-inference-api.html", - "name": "inference.update", - "privileges": { - "cluster": [ - "manage_inference" - ] - }, + "description": "Perform inference on the service using the Unified Schema", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html", + "name": "inference.unified_inference", "request": { "name": "Request", - "namespace": "inference.update" + "namespace": "inference.unified_inference" }, - "requestBodyRequired": true, + "requestBodyRequired": false, "requestMediaType": [ "application/json" ], "response": { "name": "Response", - "namespace": "inference.update" + "namespace": "inference.unified_inference" }, "responseMediaType": [ - "application/json" + "text/event-stream" ], "urls": [ { "methods": [ "POST" ], - "path": "/_inference/{inference_id}/_update" + "path": "/_inference/{inference_id}/_unified" }, { "methods": [ "POST" ], - "path": "/_inference/{task_type}/{inference_id}/_update" + "path": "/_inference/{task_type}/{inference_id}/_unified" } ] }, { "availability": { - "serverless": { - "stability": "stable", - "visibility": "public" - }, "stack": { - "since": "8.18.0", + "since": "8.17.0", "stability": "stable", "visibility": "public" } }, - "description": "Perform inference on the service using the Unified Schema", - "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html", - "name": "inference.unified_inference", + "description": "Update an inference endpoint.\n\nModify `task_settings`, secrets (within `service_settings`), or `num_allocations` for an inference endpoint, depending on the specific endpoint service and `task_type`.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "docId": "inference-api-update", + "docUrl": "https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/update-inference-api.html", + "name": "inference.update", + "privileges": { + "cluster": [ + "manage_inference" + ] + }, "request": { "name": "Request", - "namespace": "inference.unified_inference" + "namespace": "inference.update" }, - "requestBodyRequired": false, + "requestBodyRequired": true, "requestMediaType": [ "application/json" ], "response": { "name": "Response", - "namespace": "inference.unified_inference" + "namespace": "inference.update" }, "responseMediaType": [ - "text/event-stream" + "application/json" ], "urls": [ { "methods": [ "POST" ], - "path": "/_inference/{inference_id}/_unified" + "path": "/_inference/{inference_id}/_update" }, { "methods": [ "POST" ], - "path": "/_inference/{task_type}/{inference_id}/_unified" + "path": "/_inference/{task_type}/{inference_id}/_update" } ] }, @@ -144313,74 +144313,6 @@ "query": [], "specLocation": "inference/stream_inference/StreamInferenceRequest.ts#L24-L57" }, - { - "kind": "response", - "body": { - "kind": "properties", - "properties": [] - }, - "name": { - "name": "Response", - "namespace": "inference.stream_inference" - }, - "specLocation": "inference/stream_inference/StreamInferenceResponse.ts#L20-L22" - }, - { - "kind": "request", - "attachedBehaviors": [ - "CommonQueryParameters" - ], - "body": { - "kind": "value", - "codegenName": "inference_config", - "value": { - "kind": "instance_of", - "type": { - "name": "InferenceEndpoint", - "namespace": "inference._types" - } - } - }, - "description": "Update an inference endpoint.\n\nModify `task_settings`, secrets (within `service_settings`), or `num_allocations` for an inference endpoint, depending on the specific endpoint service and `task_type`.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", - "inherits": { - "type": { - "name": "RequestBase", - "namespace": "_types" - } - }, - "name": { - "name": "Request", - "namespace": "inference.update" - }, - "path": [ - { - "description": "The unique identifier of the inference endpoint.", - "name": "inference_id", - "required": true, - "type": { - "kind": "instance_of", - "type": { - "name": "Id", - "namespace": "_types" - } - } - }, - { - "description": "The type of inference task that the model performs.", - "name": "task_type", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "TaskType", - "namespace": "inference._types" - } - } - } - ], - "query": [], - "specLocation": "inference/update/UpdateInferenceRequest.ts#L25-L51" - }, { "kind": "response", "body": { @@ -144388,16 +144320,16 @@ "value": { "kind": "instance_of", "type": { - "name": "InferenceEndpointInfo", - "namespace": "inference._types" + "name": "StreamResult", + "namespace": "_types" } } }, "name": { "name": "Response", - "namespace": "inference.update" + "namespace": "inference.stream_inference" }, - "specLocation": "inference/update/UpdateInferenceResponse.ts#L22-L24" + "specLocation": "inference/stream_inference/StreamInferenceResponse.ts#L22-L24" }, { "kind": "interface", @@ -144978,6 +144910,80 @@ ], "specLocation": "inference/unified_inference/UnifiedRequest.ts#L106-L118" }, + { + "kind": "request", + "attachedBehaviors": [ + "CommonQueryParameters" + ], + "body": { + "kind": "value", + "codegenName": "inference_config", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpoint", + "namespace": "inference._types" + } + } + }, + "description": "Update an inference endpoint.\n\nModify `task_settings`, secrets (within `service_settings`), or `num_allocations` for an inference endpoint, depending on the specific endpoint service and `task_type`.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models.\nHowever, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.", + "inherits": { + "type": { + "name": "RequestBase", + "namespace": "_types" + } + }, + "name": { + "name": "Request", + "namespace": "inference.update" + }, + "path": [ + { + "description": "The unique identifier of the inference endpoint.", + "name": "inference_id", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "Id", + "namespace": "_types" + } + } + }, + { + "description": "The type of inference task that the model performs.", + "name": "task_type", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TaskType", + "namespace": "inference._types" + } + } + } + ], + "query": [], + "specLocation": "inference/update/UpdateInferenceRequest.ts#L25-L51" + }, + { + "kind": "response", + "body": { + "kind": "value", + "value": { + "kind": "instance_of", + "type": { + "name": "InferenceEndpointInfo", + "namespace": "inference._types" + } + } + }, + "name": { + "name": "Response", + "namespace": "inference.update" + }, + "specLocation": "inference/update/UpdateInferenceResponse.ts#L22-L24" + }, { "kind": "interface", "inherits": { diff --git a/output/schema/validation-errors.json b/output/schema/validation-errors.json index 3ff0079c44..2e2edb86de 100644 --- a/output/schema/validation-errors.json +++ b/output/schema/validation-errors.json @@ -259,32 +259,6 @@ ], "response": [] }, -<<<<<<< HEAD - "inference.stream_inference": { - "request": [ - "Missing request & response" -======= - "ingest.delete_geoip_database": { - "request": [ - "Request: query parameter 'master_timeout' does not exist in the json spec", - "Request: query parameter 'timeout' does not exist in the json spec" - ], - "response": [] - }, - "ingest.delete_ip_location_database": { - "request": [ - "Request: query parameter 'master_timeout' does not exist in the json spec", - "Request: query parameter 'timeout' does not exist in the json spec" - ], - "response": [] - }, - "ingest.get_geoip_database": { - "request": [ - "Request: query parameter 'master_timeout' does not exist in the json spec" ->>>>>>> dc0cd555b (Add inference.stream_inference) - ], - "response": [] - }, "ingest.get_ip_location_database": { "request": [ "Request: query parameter 'master_timeout' does not exist in the json spec" diff --git a/output/typescript/types.ts b/output/typescript/types.ts index b38a858f59..fe049c6636 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -12939,7 +12939,16 @@ export interface InferencePutRequest extends RequestBase { export type InferencePutResponse = InferenceInferenceEndpointInfo -<<<<<<< HEAD +export interface InferenceStreamInferenceRequest extends RequestBase { + inference_id: Id + task_type?: InferenceTaskType + body?: { + input: string | string[] + } +} + +export type InferenceStreamInferenceResponse = StreamResult + export interface InferenceUnifiedInferenceCompletionTool { type: string function: InferenceUnifiedInferenceCompletionToolFunction @@ -13004,17 +13013,6 @@ export interface InferenceUnifiedInferenceToolCall { export interface InferenceUnifiedInferenceToolCallFunction { arguments: string name: string -======= -export interface InferenceStreamInferenceRequest extends RequestBase { - inference_id: Id - task_type?: InferenceTaskType - body?: { - input: string | string[] - } -} - -export interface InferenceStreamInferenceResponse { ->>>>>>> dc0cd555b (Add inference.stream_inference) } export interface InferenceUpdateRequest extends RequestBase { diff --git a/specification/inference/stream_inference/StreamInferenceResponse.ts b/specification/inference/stream_inference/StreamInferenceResponse.ts index 0769bf66cb..74b823bf40 100644 --- a/specification/inference/stream_inference/StreamInferenceResponse.ts +++ b/specification/inference/stream_inference/StreamInferenceResponse.ts @@ -17,6 +17,8 @@ * under the License. */ +import { StreamResult } from '@_types/Binary' + export class Response { - body: {} + body: StreamResult } From 63738244cd86b60c8d507696f6261e220c703afa Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Mon, 13 Jan 2025 13:13:49 +0100 Subject: [PATCH 5/7] fix overlay --- docs/overlays/elasticsearch-openapi-overlays.yaml | 14 +++++++++----- .../request}/StreamInferenceRequestExample1.yaml | 0 2 files changed, 9 insertions(+), 5 deletions(-) rename specification/inference/stream_inference/{ => examples/request}/StreamInferenceRequestExample1.yaml (100%) diff --git a/docs/overlays/elasticsearch-openapi-overlays.yaml b/docs/overlays/elasticsearch-openapi-overlays.yaml index 29291eb394..f35c84d174 100644 --- a/docs/overlays/elasticsearch-openapi-overlays.yaml +++ b/docs/overlays/elasticsearch-openapi-overlays.yaml @@ -572,8 +572,12 @@ actions: examples: updateWatcherSettingsRequestExample1: $ref: "../../specification/watcher/get_settings/examples/200_response/WatcherGetSettingsResponseExample1.yaml" - -======= - streamInferenceRequestExample1: - $ref: "../../specification/inference/stream_inference/StreamInferenceRequestExample1.yaml" ->>>>>>> dc0cd555b (Add inference.stream_inference) +## Examples for inference +- target: "$.components['requestBodies']['inference.stream_inference']" + description: "Add example for inference stream request" + update: + content: + application/json: + examples: + streamInferenceRequestExample1: + $ref: "../../specification/inference/stream_inference/example/request/StreamInferenceRequestExample1.yaml" diff --git a/specification/inference/stream_inference/StreamInferenceRequestExample1.yaml b/specification/inference/stream_inference/examples/request/StreamInferenceRequestExample1.yaml similarity index 100% rename from specification/inference/stream_inference/StreamInferenceRequestExample1.yaml rename to specification/inference/stream_inference/examples/request/StreamInferenceRequestExample1.yaml From a910fa9fbcd06c73eb03a924b2c0b5fa4df55af0 Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Mon, 13 Jan 2025 13:21:11 +0100 Subject: [PATCH 6/7] fix wrong rebase --- docs/overlays/elasticsearch-openapi-overlays.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/overlays/elasticsearch-openapi-overlays.yaml b/docs/overlays/elasticsearch-openapi-overlays.yaml index f35c84d174..f233a9b3cc 100644 --- a/docs/overlays/elasticsearch-openapi-overlays.yaml +++ b/docs/overlays/elasticsearch-openapi-overlays.yaml @@ -394,7 +394,6 @@ actions: examples: indicesLegacyPutTemplateRequestExample1: $ref: "../../specification/indices/put_template/indicesPutTemplateRequestExample1.yaml" -<<<<<<< HEAD ## Examples for behavioral analytics - target: "$.paths['/_application/analytics/{collection_name}/event/{event_type}']['post']" description: "Add examples for post analytics collection event operation" @@ -419,15 +418,10 @@ actions: ## Examples for ingest - target: "$.components['requestBodies']['simulate.ingest']" description: "Add example for simulate ingest request" -======= - - target: "$.components['requestBodies']['inference.stream_inference']" - description: "Add example for inference stream request" ->>>>>>> dc0cd555b (Add inference.stream_inference) update: content: application/json: examples: -<<<<<<< HEAD simulateIngestRequestExample1: $ref: "../../specification/simulate/ingest/examples/request/SimulateIngestRequestExample1.yaml" simulateIngestRequestExample2: From 9d597223306b9f693f79ec9169acd1731db63c76 Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Mon, 13 Jan 2025 13:22:36 +0100 Subject: [PATCH 7/7] fix overlay wrong indent --- .../overlays/elasticsearch-openapi-overlays.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/overlays/elasticsearch-openapi-overlays.yaml b/docs/overlays/elasticsearch-openapi-overlays.yaml index f233a9b3cc..2523534f1c 100644 --- a/docs/overlays/elasticsearch-openapi-overlays.yaml +++ b/docs/overlays/elasticsearch-openapi-overlays.yaml @@ -567,11 +567,11 @@ actions: updateWatcherSettingsRequestExample1: $ref: "../../specification/watcher/get_settings/examples/200_response/WatcherGetSettingsResponseExample1.yaml" ## Examples for inference -- target: "$.components['requestBodies']['inference.stream_inference']" - description: "Add example for inference stream request" - update: - content: - application/json: - examples: - streamInferenceRequestExample1: - $ref: "../../specification/inference/stream_inference/example/request/StreamInferenceRequestExample1.yaml" + - target: "$.components['requestBodies']['inference.stream_inference']" + description: "Add example for inference stream request" + update: + content: + application/json: + examples: + streamInferenceRequestExample1: + $ref: "../../specification/inference/stream_inference/examples/request/StreamInferenceRequestExample1.yaml"