Skip to content

Commit 4dc88a0

Browse files
author
sunhailin
committed
update version 2.0.4
1 parent d418c7a commit 4dc88a0

File tree

6 files changed

+130
-0
lines changed

6 files changed

+130
-0
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@ func main() {
146146

147147
### Version
148148

149+
* version 2.0.4 - 2024/07/09
150+
* Update `W2NER` input feature problem.(Missing `MaxSeqLength` config)
151+
* Code style fix. Reducing nil cases
152+
* Add `slice.StringSliceTruncatePrecisely` function for logic to handle [][] string data truncation.
153+
149154
* version 2.0.3 - 2024/07/08
150155
* Fix `w2ner.pieces2word` nil slice caused infer error.
151156

models/transformers/bert_w2ner.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ func (w *W2NerModelService) getBertInputFeature(batchInferData [][]string) []*W2
8383
for j, token := range inferData {
8484
tokens[j] = w.getTokenizerResult(token)
8585
}
86+
// The minus 2 is due to the retention of the CLS and SEP positions.
87+
tokens = utils.StringSliceTruncatePrecisely(tokens, w.MaxSeqLength-2)
8688
batchInferTokens[i] = tokens
8789
batchInferPieces[i] = utils.Flatten2DSlice(tokens)
8890
batchInputFeatures[i] = &W2NERInputFeature{}

nvidia_inferenceserver/triton_service_interface.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"encoding/json"
66
"errors"
7+
"net/http"
78
"strconv"
89
"time"
910

@@ -291,7 +292,14 @@ func (t *TritonClientService) ModelHTTPInfer(
291292
timeout)
292293
defer fasthttp.ReleaseResponse(modelInferResponse)
293294

295+
if modelInferResponse == nil {
296+
return nil, t.httpErrorHandler(http.StatusInternalServerError, errors.New("modelInferResponse is nil"))
297+
}
298+
294299
if inferErr != nil || modelInferResponse.StatusCode() != fasthttp.StatusOK {
300+
if inferErr == nil && modelInferResponse.Body() != nil {
301+
inferErr = errors.New("Triton error resp: " + string(modelInferResponse.Body()))
302+
}
295303
return nil, t.httpErrorHandler(modelInferResponse.StatusCode(), inferErr)
296304
}
297305
// decode Result.
@@ -341,6 +349,9 @@ func (t *TritonClientService) CheckServerAlive(timeout time.Duration) (bool, err
341349
}
342350
apiResp, httpErr := t.makeHTTPPostRequestWithDoTimeout(t.getServerURL()+TritonAPIForServerIsLive, nil, timeout)
343351
defer fasthttp.ReleaseResponse(apiResp)
352+
if apiResp == nil {
353+
return false, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
354+
}
344355
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
345356
return false, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
346357
}
@@ -362,6 +373,9 @@ func (t *TritonClientService) CheckServerReady(timeout time.Duration) (bool, err
362373
}
363374
apiResp, httpErr := t.makeHTTPPostRequestWithDoTimeout(t.getServerURL()+TritonAPIForServerIsReady, nil, timeout)
364375
defer fasthttp.ReleaseResponse(apiResp)
376+
if apiResp == nil {
377+
return false, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
378+
}
365379
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
366380
return false, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
367381
}
@@ -386,6 +400,9 @@ func (t *TritonClientService) CheckModelReady(modelName, modelVersion string, ti
386400
t.getServerURL()+TritonAPIForModelPrefix+modelName+TritonAPIForModelVersionPrefix+modelVersion+"/ready",
387401
nil, timeout)
388402
defer fasthttp.ReleaseResponse(apiResp)
403+
if apiResp == nil {
404+
return false, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
405+
}
389406
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
390407
return false, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
391408
}
@@ -404,6 +421,9 @@ func (t *TritonClientService) ServerMetadata(timeout time.Duration) (*ServerMeta
404421
}
405422
apiResp, httpErr := t.makeHTTPPostRequestWithDoTimeout(t.getServerURL()+TritonAPIPrefix, nil, timeout)
406423
defer fasthttp.ReleaseResponse(apiResp)
424+
if apiResp == nil {
425+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
426+
}
407427
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
408428
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
409429
}
@@ -431,6 +451,9 @@ func (t *TritonClientService) ModelMetadataRequest(
431451
t.getServerURL()+TritonAPIForModelPrefix+modelName+TritonAPIForModelVersionPrefix+modelVersion,
432452
nil, timeout)
433453
defer fasthttp.ReleaseResponse(apiResp)
454+
if apiResp == nil {
455+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
456+
}
434457
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
435458
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
436459
}
@@ -460,6 +483,9 @@ func (t *TritonClientService) ModelIndex(
460483
}
461484
apiResp, httpErr := t.makeHTTPPostRequestWithDoTimeout(t.getServerURL()+TritonAPIForRepoIndex, reqBody, timeout)
462485
defer fasthttp.ReleaseResponse(apiResp)
486+
if apiResp == nil {
487+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
488+
}
463489
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
464490
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
465491
}
@@ -486,6 +512,9 @@ func (t *TritonClientService) ModelConfiguration(
486512
t.getServerURL()+TritonAPIForModelPrefix+modelName+
487513
TritonAPIForModelVersionPrefix+modelVersion+"/config", timeout)
488514
defer fasthttp.ReleaseResponse(apiResp)
515+
if apiResp == nil {
516+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
517+
}
489518
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
490519
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
491520
}
@@ -512,6 +541,9 @@ func (t *TritonClientService) ModelInferStats(
512541
t.getServerURL()+TritonAPIForModelPrefix+modelName+TritonAPIForModelVersionPrefix+modelVersion+"/stats",
513542
timeout)
514543
defer fasthttp.ReleaseResponse(apiResp)
544+
if apiResp == nil {
545+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
546+
}
515547
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
516548
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
517549
}
@@ -532,6 +564,9 @@ func (t *TritonClientService) ModelLoadWithHTTP(
532564
apiResp, httpErr := t.makeHTTPPostRequestWithDoTimeout(
533565
t.getServerURL()+TritonAPIForRepoModelPrefix+modelName+"/load", modelConfigBody, timeout)
534566
defer fasthttp.ReleaseResponse(apiResp)
567+
if apiResp == nil {
568+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
569+
}
535570
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
536571
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
537572
}
@@ -564,6 +599,10 @@ func (t *TritonClientService) ModelUnloadWithHTTP(
564599
) (*RepositoryModelUnloadResponse, error) {
565600
apiResp, httpErr := t.makeHTTPPostRequestWithDoTimeout(
566601
t.getServerURL()+TritonAPIForRepoModelPrefix+modelName+"/unload", modelConfigBody, timeout)
602+
defer fasthttp.ReleaseResponse(apiResp)
603+
if apiResp == nil {
604+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
605+
}
567606
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
568607
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
569608
}
@@ -624,6 +663,9 @@ func (t *TritonClientService) ShareMemoryStatus(
624663
}
625664
apiResp, httpErr := t.makeHTTPGetRequestWithDoTimeout(uri, timeout)
626665
defer fasthttp.ReleaseResponse(apiResp)
666+
if apiResp == nil {
667+
return false, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
668+
}
627669
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
628670
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
629671
}
@@ -669,6 +711,9 @@ func (t *TritonClientService) ShareCUDAMemoryRegister(
669711
apiResp, httpErr := t.makeHTTPPostRequestWithDoTimeout(
670712
t.getServerURL()+TritonAPIForCudaMemoryRegionPrefix+regionName+"/register", reqBody, timeout)
671713
defer fasthttp.ReleaseResponse(apiResp)
714+
if apiResp == nil {
715+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
716+
}
672717
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
673718
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
674719
}
@@ -695,6 +740,9 @@ func (t *TritonClientService) ShareCUDAMemoryUnRegister(
695740
apiResp, httpErr := t.makeHTTPPostRequestWithDoTimeout(
696741
t.getServerURL()+TritonAPIForCudaMemoryRegionPrefix+regionName+"/unregister", nil, timeout)
697742
defer fasthttp.ReleaseResponse(apiResp)
743+
if apiResp == nil {
744+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
745+
}
698746
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
699747
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
700748
}
@@ -732,6 +780,9 @@ func (t *TritonClientService) ShareSystemMemoryRegister(
732780
apiResp, httpErr := t.makeHTTPPostRequestWithDoTimeout(
733781
t.getServerURL()+TritonAPIForSystemMemoryRegionPrefix+regionName+"/register", reqBody, timeout)
734782
defer fasthttp.ReleaseResponse(apiResp)
783+
if apiResp == nil {
784+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
785+
}
735786
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
736787
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
737788
}
@@ -758,6 +809,9 @@ func (t *TritonClientService) ShareSystemMemoryUnRegister(
758809
apiResp, httpErr := t.makeHTTPPostRequestWithDoTimeout(
759810
t.getServerURL()+TritonAPIForSystemMemoryRegionPrefix+regionName+"/unregister", nil, timeout)
760811
defer fasthttp.ReleaseResponse(apiResp)
812+
if apiResp == nil {
813+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
814+
}
761815
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
762816
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
763817
}
@@ -784,6 +838,9 @@ func (t *TritonClientService) GetModelTracingSetting(
784838
apiResp, httpErr := t.makeHTTPGetRequestWithDoTimeout(
785839
t.getServerURL()+TritonAPIForModelPrefix+modelName+"/trace/setting", timeout)
786840
defer fasthttp.ReleaseResponse(apiResp)
841+
if apiResp == nil {
842+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
843+
}
787844
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
788845
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
789846
}
@@ -816,6 +873,9 @@ func (t *TritonClientService) SetModelTracingSetting(
816873
apiResp, httpErr := t.makeHTTPPostRequestWithDoTimeout(
817874
t.getServerURL()+TritonAPIForModelPrefix+modelName+"/trace/setting", reqBody, timeout)
818875
defer fasthttp.ReleaseResponse(apiResp)
876+
if apiResp == nil {
877+
return nil, t.httpErrorHandler(http.StatusInternalServerError, utils.ErrApiRespNil)
878+
}
819879
if httpErr != nil || apiResp.StatusCode() != fasthttp.StatusOK {
820880
return nil, t.httpErrorHandler(apiResp.StatusCode(), httpErr)
821881
}

test/slice_test.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,25 @@ func TestGenerateRange(t *testing.T) {
102102
t.Errorf("Test case 1 failed. Expected %v, got %v", expected1, result1)
103103
}
104104
}
105+
106+
func TestRemoveOldestElements(t *testing.T) {
107+
input := [][]string{
108+
{"a", "b", "c"},
109+
{"d", "e", "f", "g"},
110+
{"h", "i"},
111+
{"j", "k", "l", "m", "n"},
112+
{"o", "p", "q", "r", "s", "t", "u", "v", "w"},
113+
{"x", "y", "z"},
114+
}
115+
testMaxLen := 12
116+
result := utils.StringSliceTruncatePrecisely(input, testMaxLen)
117+
expected := [][]string{
118+
{"a", "b", "c"},
119+
{"d", "e", "f", "g"},
120+
{"h", "i"},
121+
{"j", "k", "l"},
122+
}
123+
if !reflect.DeepEqual(result, expected) {
124+
t.Errorf("Test case 1 failed. Expected %v, got %v", expected, result)
125+
}
126+
}

utils/const.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ const (
2828
)
2929

3030
var (
31+
ErrApiRespNil = errors.New("apiResp is nil") // empty http response body.
3132
ErrEmptyVocab = errors.New("empty vocab") // empty vocab error.
3233
ErrEmptyCallbackFunc = errors.New("callback function is nil") // empty callback function.
3334
ErrEmptyHTTPRequestBody = errors.New("http request body is nil") // empty http request body.

utils/slice.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,43 @@ func GenerateRange[T IntNumeric](start, end int) []T {
122122

123123
return result
124124
}
125+
126+
// StringSliceTruncatePrecisely Truncation control granularity at sub-element level
127+
// More precise than StringSliceTruncate
128+
func StringSliceTruncatePrecisely(slices [][]string, maxLen int) [][]string {
129+
// count total length
130+
totalLen := 0
131+
for _, slice := range slices {
132+
totalLen += len(slice)
133+
}
134+
135+
// early return
136+
if totalLen < maxLen {
137+
return slices
138+
}
139+
140+
// If the total length exceeds maxLen
141+
// remove the children one by one, starting from the end.
142+
if totalLen > maxLen {
143+
removeCount := totalLen - maxLen
144+
145+
// Delete elements from the end
146+
for removeCount > 0 {
147+
lastSliceIndex := len(slices) - 1
148+
lastSlice := slices[lastSliceIndex]
149+
150+
if len(lastSlice) <= removeCount {
151+
// If the length of the last sub-slice is less than or
152+
// equal to the number to be deleted, delete the entire sub-slice
153+
removeCount -= len(lastSlice)
154+
slices = slices[:lastSliceIndex]
155+
} else {
156+
// Otherwise only the required number of elements are deleted
157+
slices[lastSliceIndex] = lastSlice[:len(lastSlice)-removeCount]
158+
removeCount = 0
159+
}
160+
}
161+
}
162+
163+
return slices
164+
}

0 commit comments

Comments
 (0)