diff --git a/package.json b/package.json index 65aafaa259..3284efbed5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "dependencies": { - "@redocly/cli": "^1.34.1", + "@redocly/cli": "^1.34.3", "@stoplight/spectral-cli": "^6.14.2" } } diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index d99ff4b33c..ac5ff2bf2b 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -892,6 +892,7 @@ export class HuggingFaceServiceSettings { } export enum HuggingFaceTaskType { + rerank, text_embedding } diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts index 8229d3c32e..0f00ec1119 100644 --- a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts +++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts @@ -44,6 +44,17 @@ import { Id } from '@_types/common' * * `e5-small-v2` * * `multilingual-e5-base` * * `multilingual-e5-small` + * + * For Elastic's `rerank` task: + * The selected model must support the `sentence-ranking` task and expose OpenAI API. + * HuggingFace supports only dedicated (not serverless) endpoints for `Rerank` so far. + * After the endpoint is initialized, copy the full endpoint URL for use. + + * Tested models for `rerank` task: + * + * * `bge-reranker-base` + * * `jina-reranker-v1-turbo-en-GGUF` + * * @rest_spec_name inference.put_hugging_face * @availability stack since=8.12.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public diff --git a/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml new file mode 100644 index 0000000000..bc7d80d377 --- /dev/null +++ b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml @@ -0,0 +1,16 @@ +summary: A text embedding task +description: Run `PUT _inference/rerank/hugging-face-rerank` to create an inference endpoint that performs a `rerank` task type. +# method_request: "PUT _inference/rerank/hugging-face-rerank" +# type: "request" +value: |- + { + "service": "hugging_face", + "service_settings": { + "api_key": "hugging-face-access-token", + "url": "url-endpoint" + }, + "task_settings": { + "return_documents": true, + "top_n": 3 + } + } diff --git a/specification/inference/rerank/examples/request/RerankRequestExample2.yaml b/specification/inference/rerank/examples/request/RerankRequestExample2.yaml new file mode 100644 index 0000000000..4489ae9045 --- /dev/null +++ b/specification/inference/rerank/examples/request/RerankRequestExample2.yaml @@ -0,0 +1,11 @@ +summary: Rerank task +description: Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face +# method_request: "POST _inference/rerank/bge-reranker-base-mkn" +# type: "request" +value: |- + { + "input": ["luke", "like", "leia", "chewy","r2d2", "star", "wars"], + "query": "star wars main character", + "return_documents": false, + "top_n": 2 + } diff --git a/specification/inference/rerank/examples/request/RerankRequestExample3.yaml b/specification/inference/rerank/examples/request/RerankRequestExample3.yaml new file mode 100644 index 0000000000..f7cca4324d --- /dev/null +++ b/specification/inference/rerank/examples/request/RerankRequestExample3.yaml @@ -0,0 +1,11 @@ +summary: Rerank task +description: Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face +# method_request: "POST _inference/rerank/bge-reranker-base-mkn" +# type: "request" +value: |- + { + "input": ["luke", "like", "leia", "chewy","r2d2", "star", "wars"], + "query": "star wars main character", + "return_documents": true, + "top_n": 3 + } diff --git a/specification/inference/rerank/examples/response/RerankResponseExample2.yaml b/specification/inference/rerank/examples/response/RerankResponseExample2.yaml new file mode 100644 index 0000000000..98af71f40a --- /dev/null +++ b/specification/inference/rerank/examples/response/RerankResponseExample2.yaml @@ -0,0 +1,18 @@ +summary: Rerank task +description: > + A successful response from `POST _inference/rerank/bge-reranker-base-mkn`. +# type: "response" +# response_code: +value: |- + { + "rerank": [ + { + "index": 6, + "relevance_score": 0.50955844 + }, + { + "index": 5, + "relevance_score": 0.084341794 + } + ] + } diff --git a/specification/inference/rerank/examples/response/RerankResponseExample3.yaml b/specification/inference/rerank/examples/response/RerankResponseExample3.yaml new file mode 100644 index 0000000000..a8790df956 --- /dev/null +++ b/specification/inference/rerank/examples/response/RerankResponseExample3.yaml @@ -0,0 +1,25 @@ +summary: Rerank task +description: > + A successful response from `POST _inference/rerank/bge-reranker-base-mkn`. +# type: "response" +# response_code: +value: |- + { + "rerank": [ + { + "index": 6, + "relevance_score": 0.50955844, + "text": "wars" + }, + { + "index": 5, + "relevance_score": 0.084341794, + "text": "star" + }, + { + "index": 3, + "relevance_score": 0.004520818, + "text": "chewy" + } + ] + }