From 3eebfb07a7ffb5021c8e7f06a8a2de788db0cd3d Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Wed, 14 May 2025 15:59:55 +0300 Subject: [PATCH 1/6] Update Inference specification for Hugging Face's completion and chat completion tasks --- output/openapi/elasticsearch-openapi.json | 16 +- .../elasticsearch-serverless-openapi.json | 16 +- output/schema/schema-serverless.json | 82 ++++++---- output/schema/schema.json | 82 ++++++---- output/typescript/types.ts | 3 +- package-lock.json | 143 ++++++++---------- package.json | 2 +- specification/inference/_types/CommonTypes.ts | 13 +- .../chat_completion_unified/UnifiedRequest.ts | 2 +- .../PostChatCompletionRequestExample2.yaml | 2 +- .../PostChatCompletionRequestExample3.yaml | 2 +- .../put_hugging_face/PutHuggingFaceRequest.ts | 24 ++- 12 files changed, 225 insertions(+), 162 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 0a7dc57c4d..72ca232706 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17421,7 +17421,7 @@ "inference" ], "summary": "Perform chat completion inference\n", - "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "operationId": "inference-chat-completion-unified", "parameters": [ { @@ -17460,12 +17460,12 @@ }, "PostChatCompletionRequestExample2": { "summary": "A chat completion task with tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { "summary": "A chat completion task with tools and tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } } @@ -18685,7 +18685,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Creates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -80324,6 +80324,8 @@ "inference._types.HuggingFaceTaskType": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding" ] }, @@ -80347,7 +80349,11 @@ "$ref": "#/components/schemas/inference._types.RateLimitSetting" }, "url": { - "description": "The URL endpoint to use for the requests.", + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, endpoint must be compatible with the OpenAI API format and include `v1/chat/completions`.", + "type": "string" + }, + "model_id": { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field is not required and will be ignored if provided.", "type": "string" } }, diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 68fb89e0a5..c6d53ca930 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9350,7 +9350,7 @@ "inference" ], "summary": "Perform chat completion inference\n", - "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "operationId": "inference-chat-completion-unified", "parameters": [ { @@ -9389,12 +9389,12 @@ }, "PostChatCompletionRequestExample2": { "summary": "A chat completion task with tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { "summary": "A chat completion task with tools and tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } } @@ -10614,7 +10614,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Creates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -51596,6 +51596,8 @@ "inference._types.HuggingFaceTaskType": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding" ] }, @@ -51619,7 +51621,11 @@ "$ref": "#/components/schemas/inference._types.RateLimitSetting" }, "url": { - "description": "The URL endpoint to use for the requests.", + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, endpoint must be compatible with the OpenAI API format and include `v1/chat/completions`.", + "type": "string" + }, + "model_id": { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field is not required and will be ignored if provided.", "type": "string" } }, diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 1d6e0422e0..09e8fb1448 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4419,7 +4419,7 @@ "visibility": "public" } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "docId": "inference-api-chat-completion", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-unified-inference", "name": "inference.chat_completion_unified", @@ -5147,7 +5147,7 @@ "visibility": "public" } }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "docId": "inference-api-put-huggingface", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face", "name": "inference.put_hugging_face", @@ -27409,7 +27409,7 @@ } } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "examples": { "PostChatCompletionRequestExample1": { "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion on the example question with streaming.", @@ -27417,12 +27417,12 @@ "value": "{\n \"model\": \"gpt-4o\",\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": \"What is Elastic?\"\n }\n ]\n}" }, "PostChatCompletionRequestExample2": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "summary": "A chat completion task with tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "summary": "A chat completion task with tools and tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } @@ -29364,7 +29364,7 @@ } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "examples": { "PutHuggingFaceRequestExample1": { "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", @@ -29410,7 +29410,7 @@ } ], "query": [], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L85" + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L97" }, { "body": { @@ -105952,11 +105952,17 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L898-L900" + "specLocation": "inference/_types/CommonTypes.ts#L909-L911" }, { "kind": "enum", "members": [ + { + "name": "chat_completion" + }, + { + "name": "completion" + }, { "name": "text_embedding" } @@ -105965,7 +105971,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L894-L896" + "specLocation": "inference/_types/CommonTypes.ts#L903-L907" }, { "kind": "enum", @@ -105978,7 +105984,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L961-L963" + "specLocation": "inference/_types/CommonTypes.ts#L972-L974" }, { "kind": "enum", @@ -105997,7 +106003,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L965-L969" + "specLocation": "inference/_types/CommonTypes.ts#L976-L980" }, { "kind": "enum", @@ -106013,7 +106019,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L956-L959" + "specLocation": "inference/_types/CommonTypes.ts#L967-L970" }, { "kind": "enum", @@ -106035,7 +106041,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L971-L976" + "specLocation": "inference/_types/CommonTypes.ts#L982-L987" }, { "codegenNames": [ @@ -106117,7 +106123,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1011-L1013" + "specLocation": "inference/_types/CommonTypes.ts#L1022-L1024" }, { "kind": "enum", @@ -106130,7 +106136,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1007-L1009" + "specLocation": "inference/_types/CommonTypes.ts#L1018-L1020" }, { "kind": "enum", @@ -106143,7 +106149,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1073-L1075" + "specLocation": "inference/_types/CommonTypes.ts#L1084-L1086" }, { "kind": "enum", @@ -106162,7 +106168,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1067-L1071" + "specLocation": "inference/_types/CommonTypes.ts#L1078-L1082" }, { "kind": "type_alias", @@ -106249,7 +106255,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1141-L1143" + "specLocation": "inference/_types/CommonTypes.ts#L1152-L1154" }, { "kind": "enum", @@ -106265,7 +106271,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1136-L1139" + "specLocation": "inference/_types/CommonTypes.ts#L1147-L1150" }, { "kind": "enum", @@ -106278,7 +106284,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1188-L1190" + "specLocation": "inference/_types/CommonTypes.ts#L1199-L1201" }, { "kind": "enum", @@ -106291,7 +106297,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1184-L1186" + "specLocation": "inference/_types/CommonTypes.ts#L1195-L1197" }, { "kind": "enum", @@ -128487,7 +128493,7 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000 for all supported tasks.\nHugging Face does not publish a universal rate limit — actual limits may vary.\nIt is recommended to adjust this value based on the capacity and limits of your specific deployment environment.", "name": "rate_limit", "required": false, "type": { @@ -128499,7 +128505,7 @@ } }, { - "description": "The URL endpoint to use for the requests.", + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, endpoint must be compatible with the OpenAI API format and include `v1/chat/completions`.", "name": "url", "required": true, "type": { @@ -128509,9 +128515,21 @@ "namespace": "_builtins" } } + }, + { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field is not required and will be ignored if provided.", + "name": "model_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } ], - "specLocation": "inference/_types/CommonTypes.ts#L871-L892" + "specLocation": "inference/_types/CommonTypes.ts#L871-L901" }, { "kind": "interface", @@ -128573,7 +128591,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L902-L931" + "specLocation": "inference/_types/CommonTypes.ts#L913-L942" }, { "kind": "interface", @@ -128619,7 +128637,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L933-L954" + "specLocation": "inference/_types/CommonTypes.ts#L944-L965" }, { "kind": "interface", @@ -128681,7 +128699,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L978-L1005" + "specLocation": "inference/_types/CommonTypes.ts#L989-L1016" }, { "kind": "interface", @@ -128768,7 +128786,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1015-L1057" + "specLocation": "inference/_types/CommonTypes.ts#L1026-L1068" }, { "kind": "interface", @@ -128790,7 +128808,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1059-L1065" + "specLocation": "inference/_types/CommonTypes.ts#L1070-L1076" }, { "kind": "interface", @@ -128854,7 +128872,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1077-L1108" + "specLocation": "inference/_types/CommonTypes.ts#L1088-L1119" }, { "kind": "interface", @@ -128914,7 +128932,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1110-L1134" + "specLocation": "inference/_types/CommonTypes.ts#L1121-L1145" }, { "kind": "interface", @@ -129002,7 +129020,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1145-L1182" + "specLocation": "inference/_types/CommonTypes.ts#L1156-L1193" }, { "description": "Defines the response for a rerank request.", diff --git a/output/schema/schema.json b/output/schema/schema.json index dbd693b483..c1a2369c8b 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9209,7 +9209,7 @@ "visibility": "public" } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "docId": "inference-api-chat-completion", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-unified-inference", "name": "inference.chat_completion_unified", @@ -9937,7 +9937,7 @@ "visibility": "public" } }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "docId": "inference-api-put-huggingface", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face", "name": "inference.put_hugging_face", @@ -154279,7 +154279,7 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000 for all supported tasks.\nHugging Face does not publish a universal rate limit — actual limits may vary.\nIt is recommended to adjust this value based on the capacity and limits of your specific deployment environment.", "name": "rate_limit", "required": false, "type": { @@ -154291,7 +154291,7 @@ } }, { - "description": "The URL endpoint to use for the requests.", + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, endpoint must be compatible with the OpenAI API format and include `v1/chat/completions`.", "name": "url", "required": true, "type": { @@ -154301,9 +154301,21 @@ "namespace": "_builtins" } } + }, + { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field is not required and will be ignored if provided.", + "name": "model_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } ], - "specLocation": "inference/_types/CommonTypes.ts#L871-L892" + "specLocation": "inference/_types/CommonTypes.ts#L871-L901" }, { "kind": "enum", @@ -154316,11 +154328,17 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L898-L900" + "specLocation": "inference/_types/CommonTypes.ts#L909-L911" }, { "kind": "enum", "members": [ + { + "name": "chat_completion" + }, + { + "name": "completion" + }, { "name": "text_embedding" } @@ -154329,7 +154347,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L894-L896" + "specLocation": "inference/_types/CommonTypes.ts#L903-L907" }, { "kind": "interface", @@ -154652,7 +154670,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L902-L931" + "specLocation": "inference/_types/CommonTypes.ts#L913-L942" }, { "kind": "enum", @@ -154665,7 +154683,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L961-L963" + "specLocation": "inference/_types/CommonTypes.ts#L972-L974" }, { "kind": "enum", @@ -154684,7 +154702,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L965-L969" + "specLocation": "inference/_types/CommonTypes.ts#L976-L980" }, { "kind": "interface", @@ -154730,7 +154748,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L933-L954" + "specLocation": "inference/_types/CommonTypes.ts#L944-L965" }, { "kind": "enum", @@ -154746,7 +154764,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L956-L959" + "specLocation": "inference/_types/CommonTypes.ts#L967-L970" }, { "kind": "enum", @@ -154768,7 +154786,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L971-L976" + "specLocation": "inference/_types/CommonTypes.ts#L982-L987" }, { "kind": "interface", @@ -154926,7 +154944,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L978-L1005" + "specLocation": "inference/_types/CommonTypes.ts#L989-L1016" }, { "kind": "enum", @@ -154939,7 +154957,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1011-L1013" + "specLocation": "inference/_types/CommonTypes.ts#L1022-L1024" }, { "kind": "enum", @@ -154952,7 +154970,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1007-L1009" + "specLocation": "inference/_types/CommonTypes.ts#L1018-L1020" }, { "kind": "interface", @@ -155039,7 +155057,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1015-L1057" + "specLocation": "inference/_types/CommonTypes.ts#L1026-L1068" }, { "kind": "enum", @@ -155052,7 +155070,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1073-L1075" + "specLocation": "inference/_types/CommonTypes.ts#L1084-L1086" }, { "kind": "interface", @@ -155074,7 +155092,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1059-L1065" + "specLocation": "inference/_types/CommonTypes.ts#L1070-L1076" }, { "kind": "enum", @@ -155093,7 +155111,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1067-L1071" + "specLocation": "inference/_types/CommonTypes.ts#L1078-L1082" }, { "kind": "interface", @@ -155665,7 +155683,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1077-L1108" + "specLocation": "inference/_types/CommonTypes.ts#L1088-L1119" }, { "kind": "enum", @@ -155678,7 +155696,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1141-L1143" + "specLocation": "inference/_types/CommonTypes.ts#L1152-L1154" }, { "kind": "interface", @@ -155738,7 +155756,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1110-L1134" + "specLocation": "inference/_types/CommonTypes.ts#L1121-L1145" }, { "kind": "enum", @@ -155754,7 +155772,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1136-L1139" + "specLocation": "inference/_types/CommonTypes.ts#L1147-L1150" }, { "kind": "interface", @@ -155842,7 +155860,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1145-L1182" + "specLocation": "inference/_types/CommonTypes.ts#L1156-L1193" }, { "kind": "enum", @@ -155855,7 +155873,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1188-L1190" + "specLocation": "inference/_types/CommonTypes.ts#L1199-L1201" }, { "kind": "enum", @@ -155868,7 +155886,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1184-L1186" + "specLocation": "inference/_types/CommonTypes.ts#L1195-L1197" }, { "kind": "request", @@ -155886,7 +155904,7 @@ } } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "examples": { "PostChatCompletionRequestExample1": { "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion on the example question with streaming.", @@ -155894,12 +155912,12 @@ "value": "{\n \"model\": \"gpt-4o\",\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": \"What is Elastic?\"\n }\n ]\n}" }, "PostChatCompletionRequestExample2": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "summary": "A chat completion task with tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "summary": "A chat completion task with tools and tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } @@ -157841,7 +157859,7 @@ } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "examples": { "PutHuggingFaceRequestExample1": { "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", @@ -157886,7 +157904,7 @@ } ], "query": [], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L85" + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L97" }, { "kind": "response", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 2133511f08..a02f9c9541 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13500,11 +13500,12 @@ export interface InferenceHuggingFaceServiceSettings { api_key: string rate_limit?: InferenceRateLimitSetting url: string + model_id?: string } export type InferenceHuggingFaceServiceType = 'hugging_face' -export type InferenceHuggingFaceTaskType = 'text_embedding' +export type InferenceHuggingFaceTaskType = 'chat_completion' | 'completion' | 'text_embedding' export interface InferenceInferenceChunkingSettings { max_chunk_size?: integer diff --git a/package-lock.json b/package-lock.json index 407084b9a7..f0620fb430 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,7 +5,7 @@ "packages": { "": { "dependencies": { - "@redocly/cli": "^1.34.1", + "@redocly/cli": "^1.34.3", "@stoplight/spectral-cli": "^6.14.2" } }, @@ -19,36 +19,33 @@ } }, "node_modules/@babel/code-frame": { - "version": "7.26.2", - "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.26.2.tgz", - "integrity": "sha512-RJlIHRueQgwWitWgF8OdFYGZX328Ax5BCemNGlqHfplnRT9ESi8JkFlvaVYbS+UubVY6dpv87Fs2u5M29iNFVQ==", + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz", + "integrity": "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==", "license": "MIT", "dependencies": { - "@babel/helper-validator-identifier": "^7.25.9", + "@babel/helper-validator-identifier": "^7.27.1", "js-tokens": "^4.0.0", - "picocolors": "^1.0.0" + "picocolors": "^1.1.1" }, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/helper-validator-identifier": { - "version": "7.25.9", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz", - "integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==", + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.27.1.tgz", + "integrity": "sha512-D2hP9eA+Sqx1kBZgzxZh0y1trbuU+JoDkiEwqhQ36nodYqJwyEIhPSdMNd7lOm/4io72luTPWH20Yda0xOuUow==", "license": "MIT", "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/runtime": { - "version": "7.26.10", - "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.26.10.tgz", - "integrity": "sha512-2WJMeRQPHKSPemqk/awGrAiuFfzBmOIPXKizAsVhWH9YJqLZ0H+HS4c8loHGgW6utJ3E/ejXQUsiGaQy2NZ9Fw==", + "version": "7.27.1", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.27.1.tgz", + "integrity": "sha512-1x3D2xEk2fRo3PAhwQwu5UubzgiVWSXTBfWpVd2Mx2AzRqJuDJCsgaDVZ7HB5iGzDW1Hl1sWN2mFyKjmR9uAog==", "license": "MIT", - "dependencies": { - "regenerator-runtime": "^0.14.0" - }, "engines": { "node": ">=6.9.0" } @@ -486,9 +483,9 @@ } }, "node_modules/@redocly/cli": { - "version": "1.34.1", - "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.34.1.tgz", - "integrity": "sha512-12aTw7A/0n+8T7yKM1E8qlFRFPZnm2i1me0sZ1WOAiGT4I2j4iUcCp+93B0nrjIs1ZdNmrT0TTrMYLhsMJYjaQ==", + "version": "1.34.3", + "resolved": "https://registry.npmjs.org/@redocly/cli/-/cli-1.34.3.tgz", + "integrity": "sha512-GJNBTMfm5wTCtH6K+RtPQZuGbqflMclXqAZ5My12tfux6xFDMW1l0MNd5RMpnIS1aeFcDX++P1gnnROWlesj4w==", "license": "MIT", "dependencies": { "@opentelemetry/api": "1.9.0", @@ -497,8 +494,8 @@ "@opentelemetry/sdk-trace-node": "1.26.0", "@opentelemetry/semantic-conventions": "1.27.0", "@redocly/config": "^0.22.0", - "@redocly/openapi-core": "1.34.1", - "@redocly/respect-core": "1.34.1", + "@redocly/openapi-core": "1.34.3", + "@redocly/respect-core": "1.34.3", "abort-controller": "^3.0.0", "chokidar": "^3.5.1", "colorette": "^1.2.0", @@ -512,7 +509,7 @@ "pluralize": "^8.0.0", "react": "^17.0.0 || ^18.2.0 || ^19.0.0", "react-dom": "^17.0.0 || ^18.2.0 || ^19.0.0", - "redoc": "2.4.0", + "redoc": "2.5.0", "semver": "^7.5.2", "simple-websocket": "^9.0.0", "styled-components": "^6.0.7", @@ -555,15 +552,15 @@ } }, "node_modules/@redocly/config": { - "version": "0.22.1", - "resolved": "https://registry.npmjs.org/@redocly/config/-/config-0.22.1.tgz", - "integrity": "sha512-1CqQfiG456v9ZgYBG9xRQHnpXjt8WoSnDwdkX6gxktuK69v2037hTAR1eh0DGIqpZ1p4k82cGH8yTNwt7/pI9g==", + "version": "0.22.2", + "resolved": "https://registry.npmjs.org/@redocly/config/-/config-0.22.2.tgz", + "integrity": "sha512-roRDai8/zr2S9YfmzUfNhKjOF0NdcOIqF7bhf4MVC5UxpjIysDjyudvlAiVbpPHp3eDRWbdzUgtkK1a7YiDNyQ==", "license": "MIT" }, "node_modules/@redocly/openapi-core": { - "version": "1.34.1", - "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.34.1.tgz", - "integrity": "sha512-KI1QOGvDk6oREbTu0JORxZX1NBxraXUbXczv0LYDs9EPp06coq874hQORqSHGEUV/DX2A6gjv4Ax33g/LFJBww==", + "version": "1.34.3", + "resolved": "https://registry.npmjs.org/@redocly/openapi-core/-/openapi-core-1.34.3.tgz", + "integrity": "sha512-3arRdUp1fNx55itnjKiUhO6t4Mf91TsrTIYINDNLAZPS0TPd5YpiXRctwjel0qqWoOOhjA34cZ3m4dksLDFUYg==", "license": "MIT", "dependencies": { "@redocly/ajv": "^8.11.2", @@ -603,14 +600,14 @@ } }, "node_modules/@redocly/respect-core": { - "version": "1.34.1", - "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.34.1.tgz", - "integrity": "sha512-Lzea25WqwxVK5+aCiq/pr7lUFdsZPYSqNzl05Z4jEtuP1DEIxJNG31ID75dZt30pPtyxjaa/dBuccruYlYflzw==", + "version": "1.34.3", + "resolved": "https://registry.npmjs.org/@redocly/respect-core/-/respect-core-1.34.3.tgz", + "integrity": "sha512-vo/gu7dRGwTVsRueVSjVk04jOQuL0w22RBJRdRUWkfyse791tYXgMCOx35ijKekL83Q/7Okxf/YX6UY1v5CAug==", "license": "MIT", "dependencies": { "@faker-js/faker": "^7.6.0", "@redocly/ajv": "8.11.2", - "@redocly/openapi-core": "1.34.1", + "@redocly/openapi-core": "1.34.3", "better-ajv-errors": "^1.2.0", "colorette": "^2.0.20", "concat-stream": "^2.0.0", @@ -1669,9 +1666,9 @@ } }, "node_modules/core-js": { - "version": "3.41.0", - "resolved": "https://registry.npmjs.org/core-js/-/core-js-3.41.0.tgz", - "integrity": "sha512-SJ4/EHwS36QMJd6h/Rg+GyR4A5xE0FSI3eZ+iBVpfqf1x0eTSg1smWLHrA+2jQThZSh97fmSgFSU8B61nxosxA==", + "version": "3.42.0", + "resolved": "https://registry.npmjs.org/core-js/-/core-js-3.42.0.tgz", + "integrity": "sha512-Sz4PP4ZA+Rq4II21qkNqOEDTDrCvcANId3xpIgB34NDkWc3UduWj2dqEtN9yZIq8Dk3HyPI33x9sqqU5C8sr0g==", "hasInstallScript": true, "license": "MIT", "funding": { @@ -1879,9 +1876,9 @@ } }, "node_modules/dompurify": { - "version": "3.2.4", - "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.2.4.tgz", - "integrity": "sha512-ysFSFEDVduQpyhzAob/kkuJjf5zWkZD8/A9ywSp1byueyuCfHamrCBa14/Oc2iiB0e51B+NpxSl5gmzn+Ms/mg==", + "version": "3.2.5", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.2.5.tgz", + "integrity": "sha512-mLPd29uoRe9HpvwP2TxClGQBzGXeEC/we/q+bFlmPPmj2p2Ugl3r6ATu/UU1v77DXNcehiBg9zsr1dREyA/dJQ==", "license": "(MPL-2.0 OR Apache-2.0)", "optionalDependencies": { "@types/trusted-types": "^2.0.7" @@ -3154,9 +3151,9 @@ } }, "node_modules/mobx": { - "version": "6.13.6", - "resolved": "https://registry.npmjs.org/mobx/-/mobx-6.13.6.tgz", - "integrity": "sha512-r19KNV0uBN4b+ER8Z0gA4y+MzDYIQ2SvOmn3fUrqPnWXdQfakd9yfbPBDBF/p5I+bd3N5Rk1fHONIvMay+bJGA==", + "version": "6.13.7", + "resolved": "https://registry.npmjs.org/mobx/-/mobx-6.13.7.tgz", + "integrity": "sha512-aChaVU/DO5aRPmk1GX8L+whocagUUpBQqoPtJk+cm7UOXUk87J4PeWCh6nNmTTIfEhiR9DI/+FnA8dln/hTK7g==", "license": "MIT", "funding": { "type": "opencollective", @@ -3220,9 +3217,9 @@ "license": "MIT" }, "node_modules/nanoid": { - "version": "3.3.9", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.9.tgz", - "integrity": "sha512-SppoicMGpZvbF1l3z4x7No3OlIjP7QJvC9XR7AhZr1kL133KHnKPztkKDc+Ir4aJ/1VhTySrtKhrsycmrMQfvg==", + "version": "3.3.11", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz", + "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==", "funding": [ { "type": "github", @@ -3445,9 +3442,9 @@ } }, "node_modules/open": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/open/-/open-10.1.0.tgz", - "integrity": "sha512-mnkeQ1qP5Ue2wd+aivTD3NHd/lZ96Lu0jgf0pwktLPtx6cTZiH7tyeGRRHs0zX0rbrahXPnXlUnbeXyaBBuIaw==", + "version": "10.1.2", + "resolved": "https://registry.npmjs.org/open/-/open-10.1.2.tgz", + "integrity": "sha512-cxN6aIDPz6rm8hbebcP7vrQNhvRcveZoJU72Y7vskh4oIm+BZwBECnx5nTmrlres1Qapvx27Qo1Auukpf8PKXw==", "license": "MIT", "dependencies": { "default-browser": "^5.2.1", @@ -3704,24 +3701,24 @@ } }, "node_modules/react": { - "version": "19.0.0", - "resolved": "https://registry.npmjs.org/react/-/react-19.0.0.tgz", - "integrity": "sha512-V8AVnmPIICiWpGfm6GLzCR/W5FXLchHop40W4nXBmdlEceh16rCN8O8LNWm5bh5XUX91fh7KpA+W0TgMKmgTpQ==", + "version": "19.1.0", + "resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz", + "integrity": "sha512-FS+XFBNvn3GTAWq26joslQgWNoFu08F4kl0J4CgdNKADkdSGXQyTCnKteIAJy96Br6YbpEU1LSzV5dYtjMkMDg==", "license": "MIT", "engines": { "node": ">=0.10.0" } }, "node_modules/react-dom": { - "version": "19.0.0", - "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.0.0.tgz", - "integrity": "sha512-4GV5sHFG0e/0AD4X+ySy6UJd3jVl1iNsNHdpad0qhABJ11twS3TTBnseqsKurKcsNqCEFeGL3uLpVChpIO3QfQ==", + "version": "19.1.0", + "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.1.0.tgz", + "integrity": "sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==", "license": "MIT", "dependencies": { - "scheduler": "^0.25.0" + "scheduler": "^0.26.0" }, "peerDependencies": { - "react": "^19.0.0" + "react": "^19.1.0" } }, "node_modules/react-is": { @@ -3770,15 +3767,15 @@ } }, "node_modules/redoc": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/redoc/-/redoc-2.4.0.tgz", - "integrity": "sha512-rFlfzFVWS9XJ6aYAs/bHnLhHP5FQEhwAHDBVgwb9L2FqDQ8Hu8rQ1G84iwaWXxZfPP9UWn7JdWkxI6MXr2ZDjw==", + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/redoc/-/redoc-2.5.0.tgz", + "integrity": "sha512-NpYsOZ1PD9qFdjbLVBZJWptqE+4Y6TkUuvEOqPUmoH7AKOmPcE+hYjotLxQNTqVoWL4z0T2uxILmcc8JGDci+Q==", "license": "MIT", "dependencies": { "@redocly/openapi-core": "^1.4.0", "classnames": "^2.3.2", "decko": "^1.2.0", - "dompurify": "^3.0.6", + "dompurify": "^3.2.4", "eventemitter3": "^5.0.1", "json-pointer": "^0.6.2", "lunr": "^2.3.9", @@ -3818,12 +3815,6 @@ "url": "https://github.com/Mermade/oas-kit?sponsor=1" } }, - "node_modules/regenerator-runtime": { - "version": "0.14.1", - "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz", - "integrity": "sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==", - "license": "MIT" - }, "node_modules/regexp.prototype.flags": { "version": "1.5.2", "resolved": "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.5.2.tgz", @@ -3998,9 +3989,9 @@ "integrity": "sha512-ERq4hUjKDbJfE4+XtZLFPCDi8Vb1JqaxAPTxWFLBx8XcAlf9Bda/ZJdVezs/NAfsMQScyIlUMx+Yeu7P7rx5jw==" }, "node_modules/scheduler": { - "version": "0.25.0", - "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.25.0.tgz", - "integrity": "sha512-xFVuu11jh+xcO7JOAGJNOXld8/TcEHK/4CituBUeUb5hqxJLj9YuemAEuvm9gQ/+pgXYfbQuqAkiYu+u7YEsNA==", + "version": "0.26.0", + "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz", + "integrity": "sha512-NlHwttCI/l5gCPR3D1nNXtWABUmBwvZpEQiD4IXSbIDq8BzLIK/7Ir5gTFSGZDUu37K5cMNp0hFtzO38sC7gWA==", "license": "MIT" }, "node_modules/semver": { @@ -4305,9 +4296,9 @@ "license": "MIT" }, "node_modules/styled-components": { - "version": "6.1.15", - "resolved": "https://registry.npmjs.org/styled-components/-/styled-components-6.1.15.tgz", - "integrity": "sha512-PpOTEztW87Ua2xbmLa7yssjNyUF9vE7wdldRfn1I2E6RTkqknkBYpj771OxM/xrvRGinLy2oysa7GOd7NcZZIA==", + "version": "6.1.18", + "resolved": "https://registry.npmjs.org/styled-components/-/styled-components-6.1.18.tgz", + "integrity": "sha512-Mvf3gJFzZCkhjY2Y/Fx9z1m3dxbza0uI9H1CbNZm/jSHCojzJhQ0R7bByrlFJINnMzz/gPulpoFFGymNwrsMcw==", "license": "MIT", "dependencies": { "@emotion/is-prop-valid": "1.2.2", @@ -4526,9 +4517,9 @@ } }, "node_modules/undici": { - "version": "6.21.2", - "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.2.tgz", - "integrity": "sha512-uROZWze0R0itiAKVPsYhFov9LxrPMHLMEQFszeI2gCN6bnIIZ8twzBCJcN2LJrBBLfrP0t1FW0g+JmKVl8Vk1g==", + "version": "6.21.3", + "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.3.tgz", + "integrity": "sha512-gBLkYIlEnSp8pFbT64yFgGE6UIB9tAkhukC23PmMDCe5Nd+cRqKxSjw5y54MK2AZMgZfJWMaNE4nYUHgi1XEOw==", "license": "MIT", "engines": { "node": ">=18.17" @@ -4566,9 +4557,9 @@ "license": "BSD" }, "node_modules/use-sync-external-store": { - "version": "1.4.0", - "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.4.0.tgz", - "integrity": "sha512-9WXSPC5fMv61vaupRkCKCxsPxBocVnwakBEkMIHHpkTTg6icbJtg6jzgtLDm4bl3cSHAca52rYWih0k4K3PfHw==", + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.5.0.tgz", + "integrity": "sha512-Rb46I4cGGVBmjamjphe8L/UnvJD+uPPtTkNvX5mZgqdbavhI4EbgIWJiIHXJ8bc/i9EQGPRh4DwEURJ552Do0A==", "license": "MIT", "peerDependencies": { "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" diff --git a/package.json b/package.json index 65aafaa259..3284efbed5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "dependencies": { - "@redocly/cli": "^1.34.1", + "@redocly/cli": "^1.34.3", "@stoplight/spectral-cli": "^6.14.2" } } diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index d99ff4b33c..d95bfc8f39 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -882,16 +882,27 @@ export class HuggingFaceServiceSettings { api_key: string /** * This setting helps to minimize the number of rate limit errors returned from Hugging Face. - * By default, the `hugging_face` service sets the number of requests allowed per minute to 3000. + * By default, the `hugging_face` service sets the number of requests allowed per minute to 3000 for all supported tasks. + * Hugging Face does not publish a universal rate limit — actual limits may vary. + * It is recommended to adjust this value based on the capacity and limits of your specific deployment environment. */ rate_limit?: RateLimitSetting /** * The URL endpoint to use for the requests. + * For `completion` and `chat_completion` tasks, endpoint must be compatible with the OpenAI API format and include `v1/chat/completions`. */ url: string + /** + * The name of the HuggingFace model to use for the inference task. + * For `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints. + * For the `text_embedding` task, this field is not required and will be ignored if provided. + */ + model_id?: string } export enum HuggingFaceTaskType { + chat_completion, + completion, text_embedding } diff --git a/specification/inference/chat_completion_unified/UnifiedRequest.ts b/specification/inference/chat_completion_unified/UnifiedRequest.ts index 3a5498defd..9dbe94747f 100644 --- a/specification/inference/chat_completion_unified/UnifiedRequest.ts +++ b/specification/inference/chat_completion_unified/UnifiedRequest.ts @@ -33,7 +33,7 @@ import { Duration } from '@_types/Time' * NOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming. * The Chat completion inference API and the Stream inference API differ in their response structure and capabilities. * The Chat completion inference API provides more comprehensive customization options through more fields and function calling support. - * If you use the `openai` service or the `elastic` service, use the Chat completion inference API. + * If you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API. * @rest_spec_name inference.chat_completion_unified * @availability stack since=8.18.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public diff --git a/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample2.yaml b/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample2.yaml index bc960fd976..20637448ba 100644 --- a/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample2.yaml +++ b/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample2.yaml @@ -1,5 +1,5 @@ summary: A chat completion task with tool_calls -description: Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`. +description: Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`. # method_request: "POST _inference/chat_completion/openai-completion/_stream" # type: "request" value: |- diff --git a/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample3.yaml b/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample3.yaml index 96dd01351e..ae14146827 100644 --- a/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample3.yaml +++ b/specification/inference/chat_completion_unified/examples/request/PostChatCompletionRequestExample3.yaml @@ -1,5 +1,5 @@ summary: A chat completion task with tools and tool_calls -description: Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`. +description: Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`. # method_request: "POST _inference/chat_completion/openai-completion/_stream" # type: "request" value: |- diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts index 8229d3c32e..593317cb90 100644 --- a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts +++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts @@ -29,13 +29,16 @@ import { Id } from '@_types/common' /** * Create a Hugging Face inference endpoint. * - * Create an inference endpoint to perform an inference task with the `hugging_face` service. + * Creates an inference endpoint to perform an inference task with the `hugging_face` service. + * Supported tasks include: `text_embedding`, `completion`, and `chat_completion`. * - * You must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL. - * Select the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section. - * Create the endpoint and copy the URL after the endpoint initialization has been finished. - * - * The following models are recommended for the Hugging Face service: + * To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint. + * Select a model that supports the task you intend to use. + + * For Elastic's `text_embedding` task: + * The selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section. + * After the endpoint has initialized, copy the generated endpoint URL. + * Recommended models for `text_embedding` task: * * * `all-MiniLM-L6-v2` * * `all-MiniLM-L12-v2` @@ -44,6 +47,15 @@ import { Id } from '@_types/common' * * `e5-small-v2` * * `multilingual-e5-base` * * `multilingual-e5-small` + * + * For Elastic's `chat_completion` and `completion` tasks: + * The selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task. + * After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use. + * Recommended models for `chat_completion` and `completion` tasks: + * + * * `Mistral-7B-Instruct-v0.2` + * * `QwQ-32B` + * * `Phi-3-mini-128k-instruct` * @rest_spec_name inference.put_hugging_face * @availability stack since=8.12.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public From d8df82556426ce5c47778968ef8c9a4b3e0e3682 Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Thu, 29 May 2025 13:04:57 +0000 Subject: [PATCH 2/6] Extend description for url parameter --- output/openapi/elasticsearch-openapi.json | 2 +- .../elasticsearch-serverless-openapi.json | 2 +- output/schema/schema-serverless.json | 82 +++++++++++-------- output/schema/schema.json | 82 +++++++++++-------- specification/inference/_types/CommonTypes.ts | 2 +- 5 files changed, 103 insertions(+), 67 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 1b872a2579..d6dd72f3e8 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -80377,7 +80377,7 @@ "$ref": "#/components/schemas/inference._types.RateLimitSetting" }, "url": { - "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, endpoint must be compatible with the OpenAI API format and include `v1/chat/completions`.", + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms). OpenAI mode must be enabled, and the endpoint URL must include `v1/chat/completions`.", "type": "string" }, "model_id": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index fc56fc88e3..69521e6dc6 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -51527,7 +51527,7 @@ "$ref": "#/components/schemas/inference._types.RateLimitSetting" }, "url": { - "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, endpoint must be compatible with the OpenAI API format and include `v1/chat/completions`.", + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms). OpenAI mode must be enabled, and the endpoint URL must include `v1/chat/completions`.", "type": "string" }, "model_id": { diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 7830c362e5..f5b18a481b 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -4419,7 +4419,7 @@ "visibility": "public" } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "docId": "inference-api-chat-completion", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-unified-inference", "name": "inference.chat_completion_unified", @@ -5147,7 +5147,7 @@ "visibility": "public" } }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "docId": "inference-api-put-huggingface", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face", "name": "inference.put_hugging_face", @@ -27409,7 +27409,7 @@ } } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "examples": { "PostChatCompletionRequestExample1": { "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion on the example question with streaming.", @@ -27417,12 +27417,12 @@ "value": "{\n \"model\": \"gpt-4o\",\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": \"What is Elastic?\"\n }\n ]\n}" }, "PostChatCompletionRequestExample2": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "summary": "A chat completion task with tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "summary": "A chat completion task with tools and tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } @@ -29364,7 +29364,7 @@ } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "examples": { "PutHuggingFaceRequestExample1": { "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", @@ -29410,7 +29410,7 @@ } ], "query": [], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L85" + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L97" }, { "body": { @@ -105904,11 +105904,17 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L902-L904" + "specLocation": "inference/_types/CommonTypes.ts#L913-L915" }, { "kind": "enum", "members": [ + { + "name": "chat_completion" + }, + { + "name": "completion" + }, { "name": "text_embedding" } @@ -105917,7 +105923,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L898-L900" + "specLocation": "inference/_types/CommonTypes.ts#L907-L911" }, { "kind": "enum", @@ -105930,7 +105936,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L965-L967" + "specLocation": "inference/_types/CommonTypes.ts#L976-L978" }, { "kind": "enum", @@ -105949,7 +105955,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L969-L973" + "specLocation": "inference/_types/CommonTypes.ts#L980-L984" }, { "kind": "enum", @@ -105965,7 +105971,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L960-L963" + "specLocation": "inference/_types/CommonTypes.ts#L971-L974" }, { "kind": "enum", @@ -105987,7 +105993,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L975-L980" + "specLocation": "inference/_types/CommonTypes.ts#L986-L991" }, { "codegenNames": [ @@ -106069,7 +106075,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1015-L1017" + "specLocation": "inference/_types/CommonTypes.ts#L1026-L1028" }, { "kind": "enum", @@ -106082,7 +106088,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1011-L1013" + "specLocation": "inference/_types/CommonTypes.ts#L1022-L1024" }, { "kind": "enum", @@ -106095,7 +106101,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1077-L1079" + "specLocation": "inference/_types/CommonTypes.ts#L1088-L1090" }, { "kind": "enum", @@ -106114,7 +106120,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1071-L1075" + "specLocation": "inference/_types/CommonTypes.ts#L1082-L1086" }, { "kind": "type_alias", @@ -106217,7 +106223,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1145-L1147" + "specLocation": "inference/_types/CommonTypes.ts#L1156-L1158" }, { "kind": "enum", @@ -106233,7 +106239,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1140-L1143" + "specLocation": "inference/_types/CommonTypes.ts#L1151-L1154" }, { "kind": "enum", @@ -106246,7 +106252,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1192-L1194" + "specLocation": "inference/_types/CommonTypes.ts#L1203-L1205" }, { "kind": "enum", @@ -106259,7 +106265,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1188-L1190" + "specLocation": "inference/_types/CommonTypes.ts#L1199-L1201" }, { "kind": "enum", @@ -128455,7 +128461,7 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000 for all supported tasks.\nHugging Face does not publish a universal rate limit — actual limits may vary.\nIt is recommended to adjust this value based on the capacity and limits of your specific deployment environment.", "name": "rate_limit", "required": false, "type": { @@ -128467,7 +128473,7 @@ } }, { - "description": "The URL endpoint to use for the requests.", + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms). OpenAI mode must be enabled, and the endpoint URL must include `v1/chat/completions`.", "name": "url", "required": true, "type": { @@ -128477,9 +128483,21 @@ "namespace": "_builtins" } } + }, + { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field is not required and will be ignored if provided.", + "name": "model_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } ], - "specLocation": "inference/_types/CommonTypes.ts#L875-L896" + "specLocation": "inference/_types/CommonTypes.ts#L875-L905" }, { "kind": "interface", @@ -128541,7 +128559,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L906-L935" + "specLocation": "inference/_types/CommonTypes.ts#L917-L946" }, { "kind": "interface", @@ -128587,7 +128605,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L937-L958" + "specLocation": "inference/_types/CommonTypes.ts#L948-L969" }, { "inherits": { @@ -128689,7 +128707,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L982-L1009" + "specLocation": "inference/_types/CommonTypes.ts#L993-L1020" }, { "kind": "interface", @@ -128776,7 +128794,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1019-L1061" + "specLocation": "inference/_types/CommonTypes.ts#L1030-L1072" }, { "kind": "interface", @@ -128798,7 +128816,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1063-L1069" + "specLocation": "inference/_types/CommonTypes.ts#L1074-L1080" }, { "kind": "interface", @@ -128862,7 +128880,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1081-L1112" + "specLocation": "inference/_types/CommonTypes.ts#L1092-L1123" }, { "kind": "interface", @@ -128922,7 +128940,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1114-L1138" + "specLocation": "inference/_types/CommonTypes.ts#L1125-L1149" }, { "kind": "interface", @@ -129010,7 +129028,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1149-L1186" + "specLocation": "inference/_types/CommonTypes.ts#L1160-L1197" }, { "description": "Defines the response for a rerank request.", diff --git a/output/schema/schema.json b/output/schema/schema.json index a57ecaf633..a97ca1e50e 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9273,7 +9273,7 @@ "visibility": "public" } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "docId": "inference-api-chat-completion", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-unified-inference", "name": "inference.chat_completion_unified", @@ -10001,7 +10001,7 @@ "visibility": "public" } }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "docId": "inference-api-put-huggingface", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face", "name": "inference.put_hugging_face", @@ -154307,7 +154307,7 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000 for all supported tasks.\nHugging Face does not publish a universal rate limit — actual limits may vary.\nIt is recommended to adjust this value based on the capacity and limits of your specific deployment environment.", "name": "rate_limit", "required": false, "type": { @@ -154319,7 +154319,7 @@ } }, { - "description": "The URL endpoint to use for the requests.", + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms). OpenAI mode must be enabled, and the endpoint URL must include `v1/chat/completions`.", "name": "url", "required": true, "type": { @@ -154329,9 +154329,21 @@ "namespace": "_builtins" } } + }, + { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field is not required and will be ignored if provided.", + "name": "model_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } ], - "specLocation": "inference/_types/CommonTypes.ts#L875-L896" + "specLocation": "inference/_types/CommonTypes.ts#L875-L905" }, { "kind": "enum", @@ -154344,11 +154356,17 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L902-L904" + "specLocation": "inference/_types/CommonTypes.ts#L913-L915" }, { "kind": "enum", "members": [ + { + "name": "chat_completion" + }, + { + "name": "completion" + }, { "name": "text_embedding" } @@ -154357,7 +154375,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L898-L900" + "specLocation": "inference/_types/CommonTypes.ts#L907-L911" }, { "kind": "interface", @@ -154720,7 +154738,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L906-L935" + "specLocation": "inference/_types/CommonTypes.ts#L917-L946" }, { "kind": "enum", @@ -154733,7 +154751,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L965-L967" + "specLocation": "inference/_types/CommonTypes.ts#L976-L978" }, { "kind": "enum", @@ -154752,7 +154770,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L969-L973" + "specLocation": "inference/_types/CommonTypes.ts#L980-L984" }, { "kind": "interface", @@ -154798,7 +154816,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L937-L958" + "specLocation": "inference/_types/CommonTypes.ts#L948-L969" }, { "kind": "enum", @@ -154814,7 +154832,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L960-L963" + "specLocation": "inference/_types/CommonTypes.ts#L971-L974" }, { "kind": "enum", @@ -154836,7 +154854,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L975-L980" + "specLocation": "inference/_types/CommonTypes.ts#L986-L991" }, { "kind": "interface", @@ -154994,7 +155012,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L982-L1009" + "specLocation": "inference/_types/CommonTypes.ts#L993-L1020" }, { "kind": "enum", @@ -155007,7 +155025,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1015-L1017" + "specLocation": "inference/_types/CommonTypes.ts#L1026-L1028" }, { "kind": "enum", @@ -155020,7 +155038,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1011-L1013" + "specLocation": "inference/_types/CommonTypes.ts#L1022-L1024" }, { "kind": "interface", @@ -155107,7 +155125,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1019-L1061" + "specLocation": "inference/_types/CommonTypes.ts#L1030-L1072" }, { "kind": "enum", @@ -155120,7 +155138,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1077-L1079" + "specLocation": "inference/_types/CommonTypes.ts#L1088-L1090" }, { "kind": "interface", @@ -155142,7 +155160,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1063-L1069" + "specLocation": "inference/_types/CommonTypes.ts#L1074-L1080" }, { "kind": "enum", @@ -155161,7 +155179,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1071-L1075" + "specLocation": "inference/_types/CommonTypes.ts#L1082-L1086" }, { "kind": "interface", @@ -155749,7 +155767,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1081-L1112" + "specLocation": "inference/_types/CommonTypes.ts#L1092-L1123" }, { "kind": "enum", @@ -155762,7 +155780,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1145-L1147" + "specLocation": "inference/_types/CommonTypes.ts#L1156-L1158" }, { "kind": "interface", @@ -155822,7 +155840,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1114-L1138" + "specLocation": "inference/_types/CommonTypes.ts#L1125-L1149" }, { "kind": "enum", @@ -155838,7 +155856,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1140-L1143" + "specLocation": "inference/_types/CommonTypes.ts#L1151-L1154" }, { "kind": "interface", @@ -155926,7 +155944,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1149-L1186" + "specLocation": "inference/_types/CommonTypes.ts#L1160-L1197" }, { "kind": "enum", @@ -155939,7 +155957,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1192-L1194" + "specLocation": "inference/_types/CommonTypes.ts#L1203-L1205" }, { "kind": "enum", @@ -155952,7 +155970,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1188-L1190" + "specLocation": "inference/_types/CommonTypes.ts#L1199-L1201" }, { "kind": "request", @@ -155970,7 +155988,7 @@ } } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "examples": { "PostChatCompletionRequestExample1": { "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion on the example question with streaming.", @@ -155978,12 +155996,12 @@ "value": "{\n \"model\": \"gpt-4o\",\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": \"What is Elastic?\"\n }\n ]\n}" }, "PostChatCompletionRequestExample2": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "summary": "A chat completion task with tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "summary": "A chat completion task with tools and tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } @@ -157925,7 +157943,7 @@ } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "examples": { "PutHuggingFaceRequestExample1": { "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", @@ -157970,7 +157988,7 @@ } ], "query": [], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L85" + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L97" }, { "kind": "response", diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 32a5d6f966..0be5f85f74 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -893,7 +893,7 @@ export class HuggingFaceServiceSettings { rate_limit?: RateLimitSetting /** * The URL endpoint to use for the requests. - * For `completion` and `chat_completion` tasks, endpoint must be compatible with the OpenAI API format and include `v1/chat/completions`. + * For `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms). OpenAI mode must be enabled, and the endpoint URL must include `v1/chat/completions`. */ url: string /** From bbdad4aea5c903bdd6b30db730827faf9f74b6ef Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Tue, 3 Jun 2025 08:09:57 +0000 Subject: [PATCH 3/6] Fix model_id description for text_embedding task and update endpoint creation comment --- output/openapi/elasticsearch-openapi.json | 4 ++-- output/openapi/elasticsearch-serverless-openapi.json | 4 ++-- output/schema/schema-serverless.json | 6 +++--- output/schema/schema.json | 6 +++--- specification/inference/_types/CommonTypes.ts | 2 +- .../inference/put_hugging_face/PutHuggingFaceRequest.ts | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index d6dd72f3e8..21445c3e2f 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -18691,7 +18691,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Creates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -80381,7 +80381,7 @@ "type": "string" }, "model_id": { - "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field is not required and will be ignored if provided.", + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included or the request will result fail.", "type": "string" } }, diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 69521e6dc6..c2fc0fb384 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -10614,7 +10614,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Creates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -51531,7 +51531,7 @@ "type": "string" }, "model_id": { - "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field is not required and will be ignored if provided.", + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included or the request will result fail.", "type": "string" } }, diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index f5b18a481b..cc42b76db3 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -5147,7 +5147,7 @@ "visibility": "public" } }, - "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "docId": "inference-api-put-huggingface", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face", "name": "inference.put_hugging_face", @@ -29364,7 +29364,7 @@ } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "examples": { "PutHuggingFaceRequestExample1": { "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", @@ -128485,7 +128485,7 @@ } }, { - "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field is not required and will be ignored if provided.", + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included or the request will result fail.", "name": "model_id", "required": false, "type": { diff --git a/output/schema/schema.json b/output/schema/schema.json index a97ca1e50e..13be1611ce 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -10001,7 +10001,7 @@ "visibility": "public" } }, - "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "docId": "inference-api-put-huggingface", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face", "name": "inference.put_hugging_face", @@ -154331,7 +154331,7 @@ } }, { - "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field is not required and will be ignored if provided.", + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included or the request will result fail.", "name": "model_id", "required": false, "type": { @@ -157943,7 +157943,7 @@ } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreates an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "examples": { "PutHuggingFaceRequestExample1": { "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index 0be5f85f74..d5dfd8db2e 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -899,7 +899,7 @@ export class HuggingFaceServiceSettings { /** * The name of the HuggingFace model to use for the inference task. * For `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints. - * For the `text_embedding` task, this field is not required and will be ignored if provided. + * For the `text_embedding` task, this field should not be included or the request will result fail. */ model_id?: string } diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts index 593317cb90..79119847c2 100644 --- a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts +++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts @@ -29,7 +29,7 @@ import { Id } from '@_types/common' /** * Create a Hugging Face inference endpoint. * - * Creates an inference endpoint to perform an inference task with the `hugging_face` service. + * Create an inference endpoint to perform an inference task with the `hugging_face` service. * Supported tasks include: `text_embedding`, `completion`, and `chat_completion`. * * To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint. From 16da9b296bc5574d65f595f2ef2889b7f2bdd110 Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Wed, 4 Jun 2025 17:25:50 +0000 Subject: [PATCH 4/6] Enhance Hugging Face integration documentation for chat completion and completion tasks --- output/openapi/elasticsearch-openapi.json | 19 +++-- .../elasticsearch-serverless-openapi.json | 19 +++-- output/schema/schema.json | 84 ++++++++++++------- specification/_doc_ids/table.csv | 1 + specification/inference/_types/CommonTypes.ts | 4 +- 5 files changed, 84 insertions(+), 43 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 1f6c018674..bf72c3b67d 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17642,7 +17642,7 @@ "inference" ], "summary": "Perform chat completion inference\n", - "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "operationId": "inference-chat-completion-unified", "parameters": [ { @@ -17681,12 +17681,12 @@ }, "PostChatCompletionRequestExample2": { "summary": "A chat completion task with tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { "summary": "A chat completion task with tools and tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } } @@ -18906,7 +18906,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -80737,6 +80737,8 @@ "inference._types.HuggingFaceTaskType": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding" ] }, @@ -80760,7 +80762,14 @@ "$ref": "#/components/schemas/inference._types.RateLimitSetting" }, "url": { - "description": "The URL endpoint to use for the requests.", + "externalDocs": { + "url": "https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms" + }, + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include `/v1/chat/completions`.\nIf the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include `/v1/chat/completions`) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown.", + "type": "string" + }, + "model_id": { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included or the request will result fail.", "type": "string" } }, diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 43b027cabd..43a54e6793 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9495,7 +9495,7 @@ "inference" ], "summary": "Perform chat completion inference\n", - "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "operationId": "inference-chat-completion-unified", "parameters": [ { @@ -9534,12 +9534,12 @@ }, "PostChatCompletionRequestExample2": { "summary": "A chat completion task with tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { "summary": "A chat completion task with tools and tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } } @@ -10759,7 +10759,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -51817,6 +51817,8 @@ "inference._types.HuggingFaceTaskType": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding" ] }, @@ -51840,7 +51842,14 @@ "$ref": "#/components/schemas/inference._types.RateLimitSetting" }, "url": { - "description": "The URL endpoint to use for the requests.", + "externalDocs": { + "url": "https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms" + }, + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include `/v1/chat/completions`.\nIf the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include `/v1/chat/completions`) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown.", + "type": "string" + }, + "model_id": { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included or the request will result fail.", "type": "string" } }, diff --git a/output/schema/schema.json b/output/schema/schema.json index aa6370192f..98a76d637a 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9305,7 +9305,7 @@ "visibility": "public" } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "docId": "inference-api-chat-completion", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-unified-inference", "name": "inference.chat_completion_unified", @@ -10033,7 +10033,7 @@ "visibility": "public" } }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "docId": "inference-api-put-huggingface", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face", "name": "inference.put_hugging_face", @@ -154977,7 +154977,7 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000 for all supported tasks.\nHugging Face does not publish a universal rate limit — actual limits may vary.\nIt is recommended to adjust this value based on the capacity and limits of your specific deployment environment.", "name": "rate_limit", "required": false, "type": { @@ -154989,7 +154989,9 @@ } }, { - "description": "The URL endpoint to use for the requests.", + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include `/v1/chat/completions`.\nIf the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include `/v1/chat/completions`) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown.", + "extDocId": "huggingface-chat-completion-interface", + "extDocUrl": "https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms", "name": "url", "required": true, "type": { @@ -154999,9 +155001,21 @@ "namespace": "_builtins" } } + }, + { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included or the request will result fail.", + "name": "model_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } ], - "specLocation": "inference/_types/CommonTypes.ts#L877-L898" + "specLocation": "inference/_types/CommonTypes.ts#L877-L909" }, { "kind": "enum", @@ -155014,11 +155028,17 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L904-L906" + "specLocation": "inference/_types/CommonTypes.ts#L917-L919" }, { "kind": "enum", "members": [ + { + "name": "chat_completion" + }, + { + "name": "completion" + }, { "name": "text_embedding" } @@ -155027,7 +155047,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L900-L902" + "specLocation": "inference/_types/CommonTypes.ts#L911-L915" }, { "kind": "interface", @@ -155430,7 +155450,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L908-L937" + "specLocation": "inference/_types/CommonTypes.ts#L921-L950" }, { "kind": "enum", @@ -155443,7 +155463,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L967-L969" + "specLocation": "inference/_types/CommonTypes.ts#L980-L982" }, { "kind": "enum", @@ -155462,7 +155482,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L971-L975" + "specLocation": "inference/_types/CommonTypes.ts#L984-L988" }, { "kind": "interface", @@ -155508,7 +155528,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L939-L960" + "specLocation": "inference/_types/CommonTypes.ts#L952-L973" }, { "kind": "enum", @@ -155524,7 +155544,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L962-L965" + "specLocation": "inference/_types/CommonTypes.ts#L975-L978" }, { "kind": "enum", @@ -155546,7 +155566,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L977-L982" + "specLocation": "inference/_types/CommonTypes.ts#L990-L995" }, { "kind": "interface", @@ -155704,7 +155724,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L984-L1011" + "specLocation": "inference/_types/CommonTypes.ts#L997-L1024" }, { "kind": "enum", @@ -155717,7 +155737,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1017-L1019" + "specLocation": "inference/_types/CommonTypes.ts#L1030-L1032" }, { "kind": "enum", @@ -155730,7 +155750,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1013-L1015" + "specLocation": "inference/_types/CommonTypes.ts#L1026-L1028" }, { "kind": "interface", @@ -155817,7 +155837,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1021-L1063" + "specLocation": "inference/_types/CommonTypes.ts#L1034-L1076" }, { "kind": "enum", @@ -155830,7 +155850,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1079-L1081" + "specLocation": "inference/_types/CommonTypes.ts#L1092-L1094" }, { "kind": "interface", @@ -155852,7 +155872,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1065-L1071" + "specLocation": "inference/_types/CommonTypes.ts#L1078-L1084" }, { "kind": "enum", @@ -155871,7 +155891,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1073-L1077" + "specLocation": "inference/_types/CommonTypes.ts#L1086-L1090" }, { "kind": "interface", @@ -156481,7 +156501,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1083-L1114" + "specLocation": "inference/_types/CommonTypes.ts#L1096-L1127" }, { "kind": "enum", @@ -156494,7 +156514,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1147-L1149" + "specLocation": "inference/_types/CommonTypes.ts#L1160-L1162" }, { "kind": "interface", @@ -156554,7 +156574,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1116-L1140" + "specLocation": "inference/_types/CommonTypes.ts#L1129-L1153" }, { "kind": "enum", @@ -156570,7 +156590,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1142-L1145" + "specLocation": "inference/_types/CommonTypes.ts#L1155-L1158" }, { "kind": "interface", @@ -156658,7 +156678,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1151-L1188" + "specLocation": "inference/_types/CommonTypes.ts#L1164-L1201" }, { "kind": "enum", @@ -156671,7 +156691,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1194-L1196" + "specLocation": "inference/_types/CommonTypes.ts#L1207-L1209" }, { "kind": "enum", @@ -156684,7 +156704,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1190-L1192" + "specLocation": "inference/_types/CommonTypes.ts#L1203-L1205" }, { "kind": "request", @@ -156702,7 +156722,7 @@ } } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation. \nIt only works with the `chat_completion` task type for `openai`, `elastic` and `googlevertexai` inference services.\n\nIMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face.\nFor built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "examples": { "PostChatCompletionRequestExample1": { "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion on the example question with streaming.", @@ -156710,12 +156730,12 @@ "value": "{\n \"model\": \"gpt-4o\",\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": \"What is Elastic?\"\n }\n ]\n}" }, "PostChatCompletionRequestExample2": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "summary": "A chat completion task with tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "summary": "A chat completion task with tools and tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } @@ -158657,7 +158677,7 @@ } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "examples": { "PutHuggingFaceRequestExample1": { "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", @@ -158702,7 +158722,7 @@ } ], "query": [], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L85" + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L97" }, { "kind": "response", diff --git a/specification/_doc_ids/table.csv b/specification/_doc_ids/table.csv index 8855539414..6c81a22fbe 100644 --- a/specification/_doc_ids/table.csv +++ b/specification/_doc_ids/table.csv @@ -260,6 +260,7 @@ grok,https://www.elastic.co/docs/explore-analyze/scripting/grok grok-processor,https://www.elastic.co/docs/reference/enrich-processor/grok-processor gsub-processor,https://www.elastic.co/docs/reference/enrich-processor/gsub-processor health-api,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-health-report +huggingface-chat-completion-interface,https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms huggingface-tokens,https://huggingface.co/settings/tokens ilm-delete-lifecycle,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-delete-lifecycle ilm-explain-lifecycle,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-ilm-explain-lifecycle diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index c83bb51473..01ee04eae0 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -895,7 +895,9 @@ export class HuggingFaceServiceSettings { rate_limit?: RateLimitSetting /** * The URL endpoint to use for the requests. - * For `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms). OpenAI mode must be enabled, and the endpoint URL must include `v1/chat/completions`. + * For `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include `/v1/chat/completions`. + * If the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include `/v1/chat/completions`) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown. + * @ext_doc_id huggingface-chat-completion-interface */ url: string /** From 332c5d152d47f22d4dc88e9a10d8a9f7320e5cdb Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Wed, 4 Jun 2025 17:31:09 +0000 Subject: [PATCH 5/6] Add @typescript-eslint/rule-tester to devDependencies in package-lock.json --- specification/package-lock.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/specification/package-lock.json b/specification/package-lock.json index 70b74363b4..b523661ed7 100644 --- a/specification/package-lock.json +++ b/specification/package-lock.json @@ -21,6 +21,9 @@ "@typescript-eslint/utils": "^8.32.1", "typescript": "^5.8.3" }, + "devDependencies": { + "@typescript-eslint/rule-tester": "^8.32.1" + }, "peerDependencies": { "eslint": ">=9.0.0" } From a6dc68fa258c65a76e72dc655edf260ce0d2ef05 Mon Sep 17 00:00:00 2001 From: Jan Kazlouski Date: Thu, 5 Jun 2025 09:34:13 +0000 Subject: [PATCH 6/6] Enhance Hugging Face integration to support chat_completion and completion tasks --- output/openapi/elasticsearch-openapi.json | 23 ++-- .../elasticsearch-serverless-openapi.json | 23 ++-- output/schema/schema.json | 100 +++++++++++------- output/typescript/types.ts | 2 +- specification/inference/_types/TaskType.ts | 4 +- 5 files changed, 101 insertions(+), 51 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index d50997116d..0d37de5f4d 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -17642,7 +17642,7 @@ "inference" ], "summary": "Perform chat completion inference\n", - "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "operationId": "inference-chat-completion-unified", "parameters": [ { @@ -17681,12 +17681,12 @@ }, "PostChatCompletionRequestExample2": { "summary": "A chat completion task with tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { "summary": "A chat completion task with tools and tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } } @@ -18906,7 +18906,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -81007,6 +81007,8 @@ "inference._types.HuggingFaceTaskType": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding" ] }, @@ -81030,7 +81032,14 @@ "$ref": "#/components/schemas/inference._types.RateLimitSetting" }, "url": { - "description": "The URL endpoint to use for the requests.", + "externalDocs": { + "url": "https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms" + }, + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include `/v1/chat/completions`.\nIf the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include `/v1/chat/completions`) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown.", + "type": "string" + }, + "model_id": { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included or the request will result fail.", "type": "string" } }, @@ -81065,7 +81074,9 @@ "inference._types.TaskTypeHuggingFace": { "type": "string", "enum": [ - "text_embedding" + "text_embedding", + "chat_completion", + "completion" ] }, "inference._types.JinaAITaskType": { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index b6d1cdce2e..d5c1da1065 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -9495,7 +9495,7 @@ "inference" ], "summary": "Perform chat completion inference\n", - "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "The chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "operationId": "inference-chat-completion-unified", "parameters": [ { @@ -9534,12 +9534,12 @@ }, "PostChatCompletionRequestExample2": { "summary": "A chat completion task with tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { "summary": "A chat completion task with tools and tool_calls", - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } } @@ -10759,7 +10759,7 @@ "inference" ], "summary": "Create a Hugging Face inference endpoint", - "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "operationId": "inference-put-hugging-face", "parameters": [ { @@ -52087,6 +52087,8 @@ "inference._types.HuggingFaceTaskType": { "type": "string", "enum": [ + "chat_completion", + "completion", "text_embedding" ] }, @@ -52110,7 +52112,14 @@ "$ref": "#/components/schemas/inference._types.RateLimitSetting" }, "url": { - "description": "The URL endpoint to use for the requests.", + "externalDocs": { + "url": "https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms" + }, + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include `/v1/chat/completions`.\nIf the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include `/v1/chat/completions`) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown.", + "type": "string" + }, + "model_id": { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included or the request will result fail.", "type": "string" } }, @@ -52145,7 +52154,9 @@ "inference._types.TaskTypeHuggingFace": { "type": "string", "enum": [ - "text_embedding" + "text_embedding", + "chat_completion", + "completion" ] }, "inference._types.JinaAITaskType": { diff --git a/output/schema/schema.json b/output/schema/schema.json index 52f078c96c..ab3a783da4 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -9305,7 +9305,7 @@ "visibility": "public" } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "docId": "inference-api-chat-completion", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-unified-inference", "name": "inference.chat_completion_unified", @@ -10033,7 +10033,7 @@ "visibility": "public" } }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "docId": "inference-api-put-huggingface", "docUrl": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-hugging-face", "name": "inference.put_hugging_face", @@ -154977,7 +154977,7 @@ } }, { - "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000.", + "description": "This setting helps to minimize the number of rate limit errors returned from Hugging Face.\nBy default, the `hugging_face` service sets the number of requests allowed per minute to 3000 for all supported tasks.\nHugging Face does not publish a universal rate limit — actual limits may vary.\nIt is recommended to adjust this value based on the capacity and limits of your specific deployment environment.", "name": "rate_limit", "required": false, "type": { @@ -154989,7 +154989,9 @@ } }, { - "description": "The URL endpoint to use for the requests.", + "description": "The URL endpoint to use for the requests.\nFor `completion` and `chat_completion` tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include `/v1/chat/completions`.\nIf the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include `/v1/chat/completions`) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown.", + "extDocId": "huggingface-chat-completion-interface", + "extDocUrl": "https://huggingface.co/docs/inference-providers/en/tasks/chat-completion#conversational-large-language-models-llms", "name": "url", "required": true, "type": { @@ -154999,9 +155001,21 @@ "namespace": "_builtins" } } + }, + { + "description": "The name of the HuggingFace model to use for the inference task.\nFor `completion` and `chat_completion` tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints.\nFor the `text_embedding` task, this field should not be included or the request will result fail.", + "name": "model_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } } ], - "specLocation": "inference/_types/CommonTypes.ts#L877-L898" + "specLocation": "inference/_types/CommonTypes.ts#L877-L909" }, { "kind": "enum", @@ -155014,11 +155028,17 @@ "name": "HuggingFaceServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L904-L906" + "specLocation": "inference/_types/CommonTypes.ts#L917-L919" }, { "kind": "enum", "members": [ + { + "name": "chat_completion" + }, + { + "name": "completion" + }, { "name": "text_embedding" } @@ -155027,7 +155047,7 @@ "name": "HuggingFaceTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L900-L902" + "specLocation": "inference/_types/CommonTypes.ts#L911-L915" }, { "kind": "interface", @@ -155990,7 +156010,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L908-L937" + "specLocation": "inference/_types/CommonTypes.ts#L921-L950" }, { "kind": "enum", @@ -156003,7 +156023,7 @@ "name": "JinaAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L967-L969" + "specLocation": "inference/_types/CommonTypes.ts#L980-L982" }, { "kind": "enum", @@ -156022,7 +156042,7 @@ "name": "JinaAISimilarityType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L971-L975" + "specLocation": "inference/_types/CommonTypes.ts#L984-L988" }, { "kind": "interface", @@ -156068,7 +156088,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L939-L960" + "specLocation": "inference/_types/CommonTypes.ts#L952-L973" }, { "kind": "enum", @@ -156084,7 +156104,7 @@ "name": "JinaAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L962-L965" + "specLocation": "inference/_types/CommonTypes.ts#L975-L978" }, { "kind": "enum", @@ -156106,7 +156126,7 @@ "name": "JinaAITextEmbeddingTask", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L977-L982" + "specLocation": "inference/_types/CommonTypes.ts#L990-L995" }, { "kind": "interface", @@ -156264,7 +156284,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L984-L1011" + "specLocation": "inference/_types/CommonTypes.ts#L997-L1024" }, { "kind": "enum", @@ -156277,7 +156297,7 @@ "name": "MistralServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1017-L1019" + "specLocation": "inference/_types/CommonTypes.ts#L1030-L1032" }, { "kind": "enum", @@ -156290,7 +156310,7 @@ "name": "MistralTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1013-L1015" + "specLocation": "inference/_types/CommonTypes.ts#L1026-L1028" }, { "kind": "interface", @@ -156377,7 +156397,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1021-L1063" + "specLocation": "inference/_types/CommonTypes.ts#L1034-L1076" }, { "kind": "enum", @@ -156390,7 +156410,7 @@ "name": "OpenAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1079-L1081" + "specLocation": "inference/_types/CommonTypes.ts#L1092-L1094" }, { "kind": "interface", @@ -156412,7 +156432,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1065-L1071" + "specLocation": "inference/_types/CommonTypes.ts#L1078-L1084" }, { "kind": "enum", @@ -156431,7 +156451,7 @@ "name": "OpenAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1073-L1077" + "specLocation": "inference/_types/CommonTypes.ts#L1086-L1090" }, { "kind": "interface", @@ -156930,13 +156950,19 @@ "members": [ { "name": "text_embedding" + }, + { + "name": "chat_completion" + }, + { + "name": "completion" } ], "name": { "name": "TaskTypeHuggingFace", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L88-L90" + "specLocation": "inference/_types/TaskType.ts#L88-L92" }, { "kind": "enum", @@ -156965,7 +156991,7 @@ "name": "TaskTypeMistral", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L92-L94" + "specLocation": "inference/_types/TaskType.ts#L94-L96" }, { "kind": "enum", @@ -156984,7 +157010,7 @@ "name": "TaskTypeOpenAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L96-L100" + "specLocation": "inference/_types/TaskType.ts#L98-L102" }, { "kind": "enum", @@ -157000,7 +157026,7 @@ "name": "TaskTypeVoyageAI", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L102-L105" + "specLocation": "inference/_types/TaskType.ts#L104-L107" }, { "kind": "enum", @@ -157013,7 +157039,7 @@ "name": "TaskTypeWatsonx", "namespace": "inference._types" }, - "specLocation": "inference/_types/TaskType.ts#L107-L109" + "specLocation": "inference/_types/TaskType.ts#L109-L111" }, { "kind": "interface", @@ -157259,7 +157285,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1083-L1114" + "specLocation": "inference/_types/CommonTypes.ts#L1096-L1127" }, { "kind": "enum", @@ -157272,7 +157298,7 @@ "name": "VoyageAIServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1147-L1149" + "specLocation": "inference/_types/CommonTypes.ts#L1160-L1162" }, { "kind": "interface", @@ -157332,7 +157358,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1116-L1140" + "specLocation": "inference/_types/CommonTypes.ts#L1129-L1153" }, { "kind": "enum", @@ -157348,7 +157374,7 @@ "name": "VoyageAITaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1142-L1145" + "specLocation": "inference/_types/CommonTypes.ts#L1155-L1158" }, { "kind": "interface", @@ -157436,7 +157462,7 @@ } } ], - "specLocation": "inference/_types/CommonTypes.ts#L1151-L1188" + "specLocation": "inference/_types/CommonTypes.ts#L1164-L1201" }, { "kind": "enum", @@ -157449,7 +157475,7 @@ "name": "WatsonxServiceType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1194-L1196" + "specLocation": "inference/_types/CommonTypes.ts#L1207-L1209" }, { "kind": "enum", @@ -157462,7 +157488,7 @@ "name": "WatsonxTaskType", "namespace": "inference._types" }, - "specLocation": "inference/_types/CommonTypes.ts#L1190-L1192" + "specLocation": "inference/_types/CommonTypes.ts#L1203-L1205" }, { "kind": "request", @@ -157480,7 +157506,7 @@ } } }, - "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai` service or the `elastic` service, use the Chat completion inference API.", + "description": "Perform chat completion inference\n\nThe chat completion inference API enables real-time responses for chat completion tasks by delivering answers incrementally, reducing response times during computation.\nIt only works with the `chat_completion` task type for `openai` and `elastic` inference services.\n\nNOTE: The `chat_completion` task type is only available within the _stream API and only supports streaming.\nThe Chat completion inference API and the Stream inference API differ in their response structure and capabilities.\nThe Chat completion inference API provides more comprehensive customization options through more fields and function calling support.\nIf you use the `openai`, `hugging_face` or the `elastic` service, use the Chat completion inference API.", "examples": { "PostChatCompletionRequestExample1": { "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion on the example question with streaming.", @@ -157488,12 +157514,12 @@ "value": "{\n \"model\": \"gpt-4o\",\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": \"What is Elastic?\"\n }\n ]\n}" }, "PostChatCompletionRequestExample2": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using an Assistant message with `tool_calls`.", "summary": "A chat completion task with tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"assistant\",\n \"content\": \"Let's find out what the weather is\",\n \"tool_calls\": [ \n {\n \"id\": \"call_KcAjWtAww20AihPHphUh46Gd\",\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_weather\",\n \"arguments\": \"{\\\"location\\\":\\\"Boston, MA\\\"}\"\n }\n }\n ]\n },\n { \n \"role\": \"tool\",\n \"content\": \"The weather is cold\",\n \"tool_call_id\": \"call_KcAjWtAww20AihPHphUh46Gd\"\n }\n ]\n}" }, "PostChatCompletionRequestExample3": { - "description": "Run `POST POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", + "description": "Run `POST _inference/chat_completion/openai-completion/_stream` to perform a chat completion using a User message with `tools` and `tool_choice`.", "summary": "A chat completion task with tools and tool_calls", "value": "{\n \"messages\": [\n {\n \"role\": \"user\",\n \"content\": [\n {\n \"type\": \"text\",\n \"text\": \"What's the price of a scarf?\"\n }\n ]\n }\n ],\n \"tools\": [\n {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\",\n \"description\": \"Get the current price of a item\",\n \"parameters\": {\n \"type\": \"object\",\n \"properties\": {\n \"item\": {\n \"id\": \"123\"\n }\n }\n }\n }\n }\n ],\n \"tool_choice\": {\n \"type\": \"function\",\n \"function\": {\n \"name\": \"get_current_price\"\n }\n }\n}" } @@ -159435,7 +159461,7 @@ } ] }, - "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\n\nYou must first create an inference endpoint on the Hugging Face endpoint page to get an endpoint URL.\nSelect the model you want to use on the new endpoint creation page (for example `intfloat/e5-small-v2`), then select the sentence embeddings task under the advanced configuration section.\nCreate the endpoint and copy the URL after the endpoint initialization has been finished.\n\nThe following models are recommended for the Hugging Face service:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`", + "description": "Create a Hugging Face inference endpoint.\n\nCreate an inference endpoint to perform an inference task with the `hugging_face` service.\nSupported tasks include: `text_embedding`, `completion`, and `chat_completion`.\n\nTo configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint.\nSelect a model that supports the task you intend to use.\n\nFor Elastic's `text_embedding` task:\nThe selected model must support the `Sentence Embeddings` task. On the new endpoint creation page, select the `Sentence Embeddings` task under the `Advanced Configuration` section.\nAfter the endpoint has initialized, copy the generated endpoint URL.\nRecommended models for `text_embedding` task:\n\n* `all-MiniLM-L6-v2`\n* `all-MiniLM-L12-v2`\n* `all-mpnet-base-v2`\n* `e5-base-v2`\n* `e5-small-v2`\n* `multilingual-e5-base`\n* `multilingual-e5-small`\n\nFor Elastic's `chat_completion` and `completion` tasks:\nThe selected model must support the `Text Generation` task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for `Text Generation`. When creating dedicated endpoint select the `Text Generation` task.\nAfter the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes `/v1/chat/completions` part in URL. Then, copy the full endpoint URL for use.\nRecommended models for `chat_completion` and `completion` tasks:\n\n* `Mistral-7B-Instruct-v0.2`\n* `QwQ-32B`\n* `Phi-3-mini-128k-instruct`", "examples": { "PutHuggingFaceRequestExample1": { "description": "Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.", @@ -159480,7 +159506,7 @@ } ], "query": [], - "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L85" + "specLocation": "inference/put_hugging_face/PutHuggingFaceRequest.ts#L29-L97" }, { "kind": "response", diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 437fc0b26f..4315cccab5 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -13798,7 +13798,7 @@ export type InferenceTaskTypeGoogleAIStudio = 'text_embedding' | 'completion' export type InferenceTaskTypeGoogleVertexAI = 'text_embedding' | 'rerank' -export type InferenceTaskTypeHuggingFace = 'text_embedding' +export type InferenceTaskTypeHuggingFace = 'text_embedding' | 'chat_completion' | 'completion' export type InferenceTaskTypeJinaAi = 'text_embedding' | 'rerank' diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts index 7c36323488..9eda210b47 100644 --- a/specification/inference/_types/TaskType.ts +++ b/specification/inference/_types/TaskType.ts @@ -86,7 +86,9 @@ export enum TaskTypeGoogleVertexAI { } export enum TaskTypeHuggingFace { - text_embedding + text_embedding, + chat_completion, + completion } export enum TaskTypeMistral {