@@ -13,6 +13,7 @@ import (
1313 "net/http"
1414 "strings"
1515 "sync"
16+ "time"
1617
1718 "golang.org/x/oauth2/google"
1819
@@ -245,6 +246,8 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas.
245246 return nil , newBifrostOperationError ("error creating auth client" , err , schemas .Vertex )
246247 }
247248
249+ startTime := time .Now ()
250+
248251 // Make request
249252 resp , err := client .Do (req )
250253 if err != nil {
@@ -267,6 +270,8 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas.
267270 }
268271 defer resp .Body .Close ()
269272
273+ latency := time .Since (startTime )
274+
270275 // Handle error response
271276 // Read response body
272277 body , err := io .ReadAll (resp .Body )
@@ -314,6 +319,7 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas.
314319 RequestType : schemas .ChatCompletionRequest ,
315320 Provider : schemas .Vertex ,
316321 ModelRequested : request .Model ,
322+ Latency : latency .Milliseconds (),
317323 }
318324
319325 if provider .sendBackRawResponse {
@@ -322,10 +328,7 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas.
322328
323329 return response , nil
324330 } else {
325- // Pre-allocate response structs from pools
326- // response := acquireOpenAIResponse()
327331 response := & schemas.BifrostChatResponse {}
328- // defer releaseOpenAIResponse(response)
329332
330333 // Use enhanced response handler with pre-allocated response
331334 rawResponse , bifrostErr := handleProviderResponse (body , response , provider .sendBackRawResponse )
@@ -336,6 +339,7 @@ func (provider *VertexProvider) ChatCompletion(ctx context.Context, key schemas.
336339 response .ExtraFields .RequestType = schemas .ChatCompletionRequest
337340 response .ExtraFields .Provider = schemas .Vertex
338341 response .ExtraFields .ModelRequested = request .Model
342+ response .ExtraFields .Latency = latency .Milliseconds ()
339343
340344 if provider .sendBackRawResponse {
341345 response .ExtraFields .RawResponse = rawResponse
@@ -484,22 +488,15 @@ func (provider *VertexProvider) Embedding(ctx context.Context, key schemas.Key,
484488 return nil , newConfigurationError ("embedding input texts are empty" , schemas .Vertex )
485489 }
486490
487- // All Vertex AI embedding models use the same native Vertex embedding API
488- return provider .handleVertexEmbedding (ctx , request .Model , key , reqBody , request .Params )
489- }
490-
491- // handleVertexEmbedding handles embedding requests using Vertex's native embedding API
492- // This is used for all Vertex AI embedding models as they all use the same response format
493- func (provider * VertexProvider ) handleVertexEmbedding (ctx context.Context , model string , key schemas.Key , vertexReq * vertex.VertexEmbeddingRequest , params * schemas.EmbeddingParameters ) (* schemas.BifrostEmbeddingResponse , * schemas.BifrostError ) {
494491 // Use the typed request directly
495- jsonBody , err := sonic .Marshal (vertexReq )
492+ jsonBody , err := sonic .Marshal (reqBody )
496493 if err != nil {
497494 return nil , newBifrostOperationError (schemas .ErrProviderJSONMarshaling , err , schemas .Vertex )
498495 }
499496
500497 // Build the native Vertex embedding API endpoint
501498 url := fmt .Sprintf ("https://%s-aiplatform.googleapis.com/v1/projects/%s/locations/%s/publishers/google/models/%s:predict" ,
502- key .VertexKeyConfig .Region , key .VertexKeyConfig .ProjectID , key .VertexKeyConfig .Region , model )
499+ key .VertexKeyConfig .Region , key .VertexKeyConfig .ProjectID , key .VertexKeyConfig .Region , request . Model )
503500
504501 // Create request
505502 req , err := http .NewRequestWithContext (ctx , "POST" , url , bytes .NewReader (jsonBody ))
@@ -532,6 +529,8 @@ func (provider *VertexProvider) handleVertexEmbedding(ctx context.Context, model
532529 return nil , newBifrostOperationError ("error creating auth client" , err , schemas .Vertex )
533530 }
534531
532+ startTime := time .Now ()
533+
535534 // Make request
536535 resp , err := client .Do (req )
537536 if err != nil {
@@ -554,6 +553,8 @@ func (provider *VertexProvider) handleVertexEmbedding(ctx context.Context, model
554553 }
555554 defer resp .Body .Close ()
556555
556+ latency := time .Since (startTime )
557+
557558 // Handle error response
558559 body , err := io .ReadAll (resp .Body )
559560 if err != nil {
@@ -598,8 +599,9 @@ func (provider *VertexProvider) handleVertexEmbedding(ctx context.Context, model
598599
599600 // Set ExtraFields
600601 bifrostResponse .ExtraFields .Provider = schemas .Vertex
601- bifrostResponse .ExtraFields .ModelRequested = model
602+ bifrostResponse .ExtraFields .ModelRequested = request . Model
602603 bifrostResponse .ExtraFields .RequestType = schemas .EmbeddingRequest
604+ bifrostResponse .ExtraFields .Latency = latency .Milliseconds ()
603605
604606 // Set raw response if enabled
605607 if provider .sendBackRawResponse {
0 commit comments