Skip to content

Commit 393c271

Browse files
author
sunhailin
committed
Add new api for SetAPIRequestTimeout
1 parent 4dc88a0 commit 393c271

File tree

5 files changed

+139
-184
lines changed

5 files changed

+139
-184
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,10 @@ func main() {
146146

147147
### Version
148148

149+
* version 2.0.5 - 2024/07/26
150+
* Remove timeout for `TritonService` interface, use `SetAPIRequestTimeout` instead.
151+
* Add new api for `SetAPIRequestTimeout`
152+
149153
* version 2.0.4 - 2024/07/09
150154
* Update `W2NER` input feature problem.(Missing `MaxSeqLength` config)
151155
* Code style fix. Reducing nil cases

models/base.go

Lines changed: 24 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,14 @@ func (m *ModelService) SetSecondaryServerURL(url string) *ModelService {
113113
return m
114114
}
115115

116+
// SetAPIRequestTimeout set api request timeout
117+
func (m *ModelService) SetAPIRequestTimeout(timeout time.Duration) *ModelService {
118+
if m.TritonService != nil {
119+
m.TritonService.SetAPIRequestTimeout(timeout)
120+
}
121+
return m
122+
}
123+
116124
// SetJsonEncoder set json encoder
117125
func (m *ModelService) SetJsonEncoder(encoder utils.JSONMarshal) *ModelService {
118126
m.TritonService.SetJSONEncoder(encoder)
@@ -130,55 +138,43 @@ func (m *ModelService) SetJsonDecoder(decoder utils.JSONUnmarshal) *ModelService
130138
//////////////////////////////////////////// Triton Service API Function ////////////////////////////////////////////
131139

132140
// CheckServerReady check server is ready.
133-
func (m *ModelService) CheckServerReady(requestTimeout time.Duration) (bool, error) {
134-
return m.TritonService.CheckServerReady(requestTimeout)
141+
func (m *ModelService) CheckServerReady() (bool, error) {
142+
return m.TritonService.CheckServerReady()
135143
}
136144

137145
// CheckServerAlive check server is alive.
138-
func (m *ModelService) CheckServerAlive(requestTimeout time.Duration) (bool, error) {
139-
return m.TritonService.CheckServerAlive(requestTimeout)
146+
func (m *ModelService) CheckServerAlive() (bool, error) {
147+
return m.TritonService.CheckServerAlive()
140148
}
141149

142150
// CheckModelReady check model is ready.
143-
func (m *ModelService) CheckModelReady(
144-
modelName, modelVersion string, requestTimeout time.Duration,
145-
) (bool, error) {
146-
return m.TritonService.CheckModelReady(modelName, modelVersion, requestTimeout)
151+
func (m *ModelService) CheckModelReady(modelName, modelVersion string) (bool, error) {
152+
return m.TritonService.CheckModelReady(modelName, modelVersion)
147153
}
148154

149155
// GetServerMeta get server meta.
150-
func (m *ModelService) GetServerMeta(
151-
requestTimeout time.Duration,
152-
) (*nvidia_inferenceserver.ServerMetadataResponse, error) {
153-
return m.TritonService.ServerMetadata(requestTimeout)
156+
func (m *ModelService) GetServerMeta() (*nvidia_inferenceserver.ServerMetadataResponse, error) {
157+
return m.TritonService.ServerMetadata()
154158
}
155159

156160
// GetModelMeta get model meta.
157-
func (m *ModelService) GetModelMeta(
158-
modelName, modelVersion string, requestTimeout time.Duration,
159-
) (*nvidia_inferenceserver.ModelMetadataResponse, error) {
160-
return m.TritonService.ModelMetadataRequest(modelName, modelVersion, requestTimeout)
161+
func (m *ModelService) GetModelMeta(modelName, modelVersion string) (*nvidia_inferenceserver.ModelMetadataResponse, error) {
162+
return m.TritonService.ModelMetadataRequest(modelName, modelVersion)
161163
}
162164

163165
// GetAllModelInfo get all model info.
164-
func (m *ModelService) GetAllModelInfo(
165-
repoName string, isReady bool, requestTimeout time.Duration,
166-
) (*nvidia_inferenceserver.RepositoryIndexResponse, error) {
167-
return m.TritonService.ModelIndex(repoName, isReady, requestTimeout)
166+
func (m *ModelService) GetAllModelInfo(repoName string, isReady bool) (*nvidia_inferenceserver.RepositoryIndexResponse, error) {
167+
return m.TritonService.ModelIndex(repoName, isReady)
168168
}
169169

170170
// GetModelConfig get model config.
171-
func (m *ModelService) GetModelConfig(
172-
modelName, modelVersion string, requestTimeout time.Duration,
173-
) (interface{}, error) {
174-
return m.TritonService.ModelConfiguration(modelName, modelVersion, requestTimeout)
171+
func (m *ModelService) GetModelConfig(modelName, modelVersion string) (interface{}, error) {
172+
return m.TritonService.ModelConfiguration(modelName, modelVersion)
175173
}
176174

177175
// GetModelInferStats get model infer stats.
178-
func (m *ModelService) GetModelInferStats(
179-
modelName, modelVersion string, requestTimeout time.Duration,
180-
) (*nvidia_inferenceserver.ModelStatisticsResponse, error) {
181-
return m.TritonService.ModelInferStats(modelName, modelVersion, requestTimeout)
176+
func (m *ModelService) GetModelInferStats(modelName, modelVersion string) (*nvidia_inferenceserver.ModelStatisticsResponse, error) {
177+
return m.TritonService.ModelInferStats(modelName, modelVersion)
182178
}
183179

184180
//////////////////////////////////////////// Triton Service API Function ////////////////////////////////////////////

models/transformers/bert.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package transformers
33
import (
44
"encoding/binary"
55
"strings"
6-
"time"
76

87
"github.com/sunhailin-Leo/triton-service-go/v2/models"
98
"github.com/sunhailin-Leo/triton-service-go/v2/nvidia_inferenceserver"
@@ -263,7 +262,6 @@ func (m *BertModelService) generateGRPCRequest(
263262
func (m *BertModelService) ModelInfer(
264263
inferData []string,
265264
modelName, modelVersion string,
266-
requestTimeout time.Duration,
267265
params ...interface{},
268266
) ([]interface{}, error) {
269267
// Create request input/output tensors
@@ -276,7 +274,7 @@ func (m *BertModelService) ModelInfer(
276274
return nil, utils.ErrEmptyGRPCRequestBody
277275
}
278276
return m.TritonService.ModelGRPCInfer(
279-
inferInputs, inferOutputs, grpcRawInputs, modelName, modelVersion, requestTimeout,
277+
inferInputs, inferOutputs, grpcRawInputs, modelName, modelVersion,
280278
m.InferCallback, m, grpcInputData, params,
281279
)
282280
}
@@ -289,7 +287,7 @@ func (m *BertModelService) ModelInfer(
289287
}
290288
// HTTP Infer
291289
return m.TritonService.ModelHTTPInfer(
292-
httpRequestBody, modelName, modelVersion, requestTimeout,
290+
httpRequestBody, modelName, modelVersion,
293291
m.InferCallback, m, httpInputData, params,
294292
)
295293
}

models/transformers/bert_w2ner.go

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package transformers
22

33
import (
44
"slices"
5-
"time"
65

76
"github.com/sunhailin-Leo/triton-service-go/v2/models"
87
"github.com/sunhailin-Leo/triton-service-go/v2/nvidia_inferenceserver"
@@ -312,7 +311,6 @@ func (w *W2NerModelService) generateGRPCRequest(
312311
func (w *W2NerModelService) ModelInfer(
313312
inferData [][]string,
314313
modelName, modelVersion string,
315-
requestTimeout time.Duration,
316314
params ...interface{},
317315
) ([]interface{}, error) {
318316
// Create request input/output tensors
@@ -325,10 +323,8 @@ func (w *W2NerModelService) ModelInfer(
325323
if grpcRawInputs == nil {
326324
return nil, utils.ErrEmptyGRPCRequestBody
327325
}
328-
return w.TritonService.ModelGRPCInfer(
329-
inferInputs, inferOutputs, grpcRawInputs, modelName, modelVersion, requestTimeout,
330-
w.InferCallback, w, grpcInputData, params,
331-
)
326+
return w.TritonService.ModelGRPCInfer(inferInputs, inferOutputs, grpcRawInputs, modelName, modelVersion,
327+
w.InferCallback, w, grpcInputData, params)
332328
}
333329

334330
httpRequestBody, httpInputData, err := w.generateHTTPRequest(inferData, inferInputs, inferOutputs)
@@ -339,10 +335,8 @@ func (w *W2NerModelService) ModelInfer(
339335
return nil, utils.ErrEmptyHTTPRequestBody
340336
}
341337
// HTTP Infer
342-
return w.TritonService.ModelHTTPInfer(
343-
httpRequestBody, modelName, modelVersion, requestTimeout,
344-
w.InferCallback, w, httpInputData, params,
345-
)
338+
return w.TritonService.ModelHTTPInfer(httpRequestBody, modelName, modelVersion, w.InferCallback,
339+
w, httpInputData, params)
346340
}
347341

348342
//////////////////////////////////////////// Triton Service API Function ////////////////////////////////////////////

0 commit comments

Comments
 (0)