Skip to content

Commit 4dcad34

Browse files
authored
Adds custom inference service API docs (#4852) (#4997)
* Adds custom inference service docs. * Adds response documentation. * Adds request params docs. * Fixes code style. * Fixes data type. * Adds json_spec. * Fixes typo. * Adds doc_id to the table.csv file. * Makes it prettier. * Adds examples. * Format fix. * Addresses feedback. * Adds more parameters and explanations. * Completes json_parser. * Addresses feedback. * Format fix. * Addresses more feedback.
1 parent dea47ae commit 4dcad34

14 files changed

+586
-12
lines changed

specification/_doc_ids/table.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,7 @@ inference-api-put-anthropic,https://www.elastic.co/guide/en/elasticsearch/refere
349349
inference-api-put-azureopenai,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-openai.html,,
350350
inference-api-put-azureaistudio,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-azure-ai-studio.html,,
351351
inference-api-put-cohere,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-cohere.html,,
352+
inference-api-put-custom,https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom,,
352353
inference-api-put-deepseek,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-deepseek.html,,
353354
inference-api-put-elasticsearch,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elasticsearch.html,,
354355
inference-api-put-eis,https://www.elastic.co/guide/en/elasticsearch/reference/{branch}/infer-service-elastic.html,,
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"inference.put_custom": {
3+
"documentation": {
4+
"url": "https://www.elastic.co/docs/api/doc/elasticsearch/operation/operation-inference-put-custom",
5+
"description": "Configure a custom inference endpoint"
6+
},
7+
"stability": "stable",
8+
"visibility": "public",
9+
"headers": {
10+
"accept": ["application/json"],
11+
"content_type": ["application/json"]
12+
},
13+
"url": {
14+
"paths": [
15+
{
16+
"path": "/_inference/{task_type}/{custom_inference_id}",
17+
"methods": ["PUT"],
18+
"parts": {
19+
"task_type": {
20+
"type": "string",
21+
"description": "The task type"
22+
},
23+
"custom_inference_id": {
24+
"type": "string",
25+
"description": "The inference Id"
26+
}
27+
}
28+
}
29+
]
30+
},
31+
"body": {
32+
"description": "The inference endpoint's task and service settings"
33+
}
34+
}
35+
}

specification/inference/_types/CommonTypes.ts

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,256 @@ export class CohereTaskSettings {
758758
truncate?: CohereTruncateType
759759
}
760760

761+
export class CustomServiceSettings {
762+
/**
763+
* Specifies the HTTPS header parameters – such as `Authentication` or `Contet-Type` – that are required to access the custom service.
764+
* For example:
765+
* ```
766+
* "headers":{
767+
* "Authorization": "Bearer ${api_key}",
768+
* "Content-Type": "application/json;charset=utf-8"
769+
* }
770+
* ```
771+
*/
772+
headers?: UserDefinedValue
773+
/**
774+
* Specifies the input type translation values that are used to replace the `${input_type}` template in the request body.
775+
* For example:
776+
* ```
777+
* "input_type": {
778+
* "translation": {
779+
* "ingest": "do_ingest",
780+
* "search": "do_search"
781+
* },
782+
* "default": "a_default"
783+
* },
784+
* ```
785+
* If the subsequent inference requests come from a search context, the `search` key will be used and the template will be replaced with `do_search`.
786+
* If it comes from the ingest context `do_ingest` is used. If it's a different context that is not specified, the default value will be used. If no default is specified an empty string is used.
787+
* `translation` can be:
788+
* * `classification`
789+
* * `clustering`
790+
* * `ingest`
791+
* * `search`
792+
*/
793+
input_type?: UserDefinedValue
794+
/**
795+
* Specifies the query parameters as a list of tuples. The arrays inside the `query_parameters` must have two items, a key and a value.
796+
* For example:
797+
* ```
798+
* "query_parameters":[
799+
* ["param_key", "some_value"],
800+
* ["param_key", "another_value"],
801+
* ["other_key", "other_value"]
802+
* ]
803+
* ```
804+
* If the base url is `https://www.elastic.co` it results in: `https://www.elastic.co?param_key=some_value&param_key=another_value&other_key=other_value`.
805+
*/
806+
query_parameters?: UserDefinedValue
807+
/**
808+
* The request configuration object.
809+
*/
810+
request: CustomRequestParams
811+
/**
812+
* The response configuration object.
813+
*/
814+
response: CustomResponseParams
815+
/**
816+
* Specifies secret parameters, like `api_key` or `api_token`, that are required to access the custom service.
817+
* For example:
818+
* ```
819+
* "secret_parameters":{
820+
* "api_key":"<api_key>"
821+
* }
822+
* ```
823+
*/
824+
secret_parameters: UserDefinedValue
825+
/**
826+
* The URL endpoint to use for the requests.
827+
*/
828+
url?: string
829+
}
830+
831+
export class CustomRequestParams {
832+
/**
833+
* The body structure of the request. It requires passing in the string-escaped result of the JSON format HTTP request body.
834+
* For example:
835+
* ```
836+
* "request": "{\"input\":${input}}"
837+
* ```
838+
* > info
839+
* > The content string needs to be a single line except when using the Kibana console.
840+
*/
841+
content: string
842+
}
843+
844+
export class CustomResponseParams {
845+
/**
846+
* Specifies the JSON parser that is used to parse the response from the custom service.
847+
* Different task types require different json_parser parameters.
848+
* For example:
849+
* ```
850+
* # text_embedding
851+
* # For a response like this:
852+
*
853+
* {
854+
* "object": "list",
855+
* "data": [
856+
* {
857+
* "object": "embedding",
858+
* "index": 0,
859+
* "embedding": [
860+
* 0.014539449,
861+
* -0.015288644
862+
* ]
863+
* }
864+
* ],
865+
* "model": "text-embedding-ada-002-v2",
866+
* "usage": {
867+
* "prompt_tokens": 8,
868+
* "total_tokens": 8
869+
* }
870+
* }
871+
*
872+
* # the json_parser definition should look like this:
873+
*
874+
* "response":{
875+
* "json_parser":{
876+
* "text_embeddings":"$.data[*].embedding[*]"
877+
* }
878+
* }
879+
*
880+
* # sparse_embedding
881+
* # For a response like this:
882+
*
883+
* {
884+
* "request_id": "75C50B5B-E79E-4930-****-F48DBB392231",
885+
* "latency": 22,
886+
* "usage": {
887+
* "token_count": 11
888+
* },
889+
* "result": {
890+
* "sparse_embeddings": [
891+
* {
892+
* "index": 0,
893+
* "embedding": [
894+
* {
895+
* "token_id": 6,
896+
* "weight": 0.101
897+
* },
898+
* {
899+
* "token_id": 163040,
900+
* "weight": 0.28417
901+
* }
902+
* ]
903+
* }
904+
* ]
905+
* }
906+
* }
907+
*
908+
* # the json_parser definition should look like this:
909+
*
910+
* "response":{
911+
* "json_parser":{
912+
* "token_path":"$.result.sparse_embeddings[*].embedding[*].token_id",
913+
* "weight_path":"$.result.sparse_embeddings[*].embedding[*].weight"
914+
* }
915+
* }
916+
*
917+
* # rerank
918+
* # For a response like this:
919+
*
920+
* {
921+
* "results": [
922+
* {
923+
* "index": 3,
924+
* "relevance_score": 0.999071,
925+
* "document": "abc"
926+
* },
927+
* {
928+
* "index": 4,
929+
* "relevance_score": 0.7867867,
930+
* "document": "123"
931+
* },
932+
* {
933+
* "index": 0,
934+
* "relevance_score": 0.32713068,
935+
* "document": "super"
936+
* }
937+
* ],
938+
* }
939+
*
940+
* # the json_parser definition should look like this:
941+
*
942+
* "response":{
943+
* "json_parser":{
944+
* "reranked_index":"$.result.scores[*].index", // optional
945+
* "relevance_score":"$.result.scores[*].score",
946+
* "document_text":"xxx" // optional
947+
* }
948+
* }
949+
*
950+
* # completion
951+
* # For a response like this:
952+
*
953+
* {
954+
* "id": "chatcmpl-B9MBs8CjcvOU2jLn4n570S5qMJKcT",
955+
* "object": "chat.completion",
956+
* "created": 1741569952,
957+
* "model": "gpt-4.1-2025-04-14",
958+
* "choices": [
959+
* {
960+
* "index": 0,
961+
* "message": {
962+
* "role": "assistant",
963+
* "content": "Hello! How can I assist you today?",
964+
* "refusal": null,
965+
* "annotations": []
966+
* },
967+
* "logprobs": null,
968+
* "finish_reason": "stop"
969+
* }
970+
* ]
971+
* }
972+
*
973+
* # the json_parser definition should look like this:
974+
*
975+
* "response":{
976+
* "json_parser":{
977+
* "completion_result":"$.choices[*].message.content"
978+
* }
979+
* }
980+
*/
981+
json_parser: UserDefinedValue
982+
}
983+
984+
export enum CustomTaskType {
985+
text_embedding,
986+
sparse_embedding,
987+
rerank,
988+
completion
989+
}
990+
991+
export enum CustomServiceType {
992+
custom
993+
}
994+
995+
export class CustomTaskSettings {
996+
/**
997+
* Specifies parameters that are required to run the custom service. The parameters depend on the model your custom service uses.
998+
* For example:
999+
* ```
1000+
* "task_settings":{
1001+
* "parameters":{
1002+
* "input_type":"query",
1003+
* "return_token":true
1004+
* }
1005+
* }
1006+
* ```
1007+
*/
1008+
parameters?: UserDefinedValue
1009+
}
1010+
7611011
export class EisServiceSettings {
7621012
/**
7631013
* The name of the model to use for the inference task.

specification/inference/_types/Services.ts

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import {
2727
TaskTypeAzureAIStudio,
2828
TaskTypeAzureOpenAI,
2929
TaskTypeCohere,
30+
TaskTypeCustom,
3031
TaskTypeDeepSeek,
3132
TaskTypeElasticsearch,
3233
TaskTypeELSER,
@@ -75,18 +76,6 @@ export class InferenceEndpointInfo extends InferenceEndpoint {
7576
*/
7677
task_type: TaskType
7778
}
78-
79-
export class InferenceEndpointInfoJinaAi extends InferenceEndpoint {
80-
/**
81-
* The inference Id
82-
*/
83-
inference_id: string
84-
/**
85-
* The task type
86-
*/
87-
task_type: TaskTypeJinaAi
88-
}
89-
9079
export class InferenceEndpointInfoAlibabaCloudAI extends InferenceEndpoint {
9180
/**
9281
* The inference Id
@@ -153,6 +142,16 @@ export class InferenceEndpointInfoCohere extends InferenceEndpoint {
153142
task_type: TaskTypeCohere
154143
}
155144

145+
export class InferenceEndpointInfoCustom extends InferenceEndpoint {
146+
/**
147+
* The inference Id
148+
*/
149+
inference_id: string
150+
/**
151+
* The task type
152+
*/
153+
task_type: TaskTypeCustom
154+
}
156155
export class InferenceEndpointInfoDeepSeek extends InferenceEndpoint {
157156
/**
158157
* The inference Id
@@ -219,6 +218,17 @@ export class InferenceEndpointInfoHuggingFace extends InferenceEndpoint {
219218
task_type: TaskTypeHuggingFace
220219
}
221220

221+
export class InferenceEndpointInfoJinaAi extends InferenceEndpoint {
222+
/**
223+
* The inference Id
224+
*/
225+
inference_id: string
226+
/**
227+
* The task type
228+
*/
229+
task_type: TaskTypeJinaAi
230+
}
231+
222232
export class InferenceEndpointInfoMistral extends InferenceEndpoint {
223233
/**
224234
* The inference Id

specification/inference/_types/TaskType.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ export enum TaskTypeCohere {
6565
completion
6666
}
6767

68+
export enum TaskTypeCustom {
69+
text_embedding,
70+
sparse_embedding,
71+
rerank,
72+
completion
73+
}
74+
6875
export enum TaskTypeDeepSeek {
6976
completion,
7077
chat_completion

0 commit comments

Comments
 (0)