diff --git a/core/bifrost.go b/core/bifrost.go index e72d9c972..371129a08 100644 --- a/core/bifrost.go +++ b/core/bifrost.go @@ -1234,6 +1234,9 @@ func (bifrost *Bifrost) requestWorker(provider schemas.Provider, config *schemas } } + // Save retry count into context + req.Context = context.WithValue(req.Context, schemas.BifrostContextKeyRetryCount, attempts) + if bifrostError != nil { // Add retry information to error if attempts > 0 { diff --git a/core/schemas/bifrost.go b/core/schemas/bifrost.go index 56aff1320..e1b067850 100644 --- a/core/schemas/bifrost.go +++ b/core/schemas/bifrost.go @@ -105,6 +105,7 @@ const ( BifrostContextKeyRequestType BifrostContextKey = "bifrost-request-type" BifrostContextKeyRequestProvider BifrostContextKey = "bifrost-request-provider" BifrostContextKeyRequestModel BifrostContextKey = "bifrost-request-model" + BifrostContextKeyRetryCount BifrostContextKey = "bifrost-retry-count" ) // NOTE: for custom plugin implementation dealing with streaming short circuit, diff --git a/docs/features/telemetry.mdx b/docs/features/telemetry.mdx index dcf790e66..6737ddcee 100644 --- a/docs/features/telemetry.mdx +++ b/docs/features/telemetry.mdx @@ -49,6 +49,7 @@ These metrics track requests forwarded to AI providers: | `bifrost_output_tokens_total` | Counter | Total output tokens received from upstream providers | `provider`, `model`, `method`, custom labels | | `bifrost_cache_hits_total` | Counter | Total cache hits by type (direct/semantic) | `provider`, `model`, `method`, `cache_type`, custom labels | | `bifrost_cost_total` | Counter | Total cost in USD for upstream provider requests | `provider`, `model`, `method`, custom labels | +| `bifrost_retries_total` | Counter | Total number of retries performed before a successful request or fallback | `provider`, `model`, `method`, custom labels | **Label Definitions:** - `provider`: AI provider name (e.g., `openai`, `anthropic`, `azure`) @@ -57,6 +58,7 @@ These metrics track requests forwarded to AI providers: - `cache_type`: Cache hit type (`direct`, `semantic`) - only for cache hits metric - `path`: HTTP endpoint path - `status`: HTTP status code +- `retries`: Number of retry attempts (recorded as metric value, not label) --- @@ -123,6 +125,19 @@ rate(bifrost_upstream_requests_total[5m]) * 100 # Errors by model sum by (model) (rate(bifrost_error_requests_total[5m])) + +### Retry Analysis +Track how often retries are happening per provider and model: + +```promql +# Retry rate per provider +rate(bifrost_retries_total[5m]) + +# Retries per request ratio +rate(bifrost_retries_total[5m]) / rate(bifrost_upstream_requests_total[5m]) + +# High retry detection (e.g. >1 retry/request on average) +rate(bifrost_retries_total[5m]) / rate(bifrost_upstream_requests_total[5m]) > 1 ``` --- diff --git a/plugins/telemetry/main.go b/plugins/telemetry/main.go index ce99ceb8f..6860605c4 100644 --- a/plugins/telemetry/main.go +++ b/plugins/telemetry/main.go @@ -42,6 +42,7 @@ type PrometheusPlugin struct { OutputTokensTotal *prometheus.CounterVec CacheHitsTotal *prometheus.CounterVec CostTotal *prometheus.CounterVec + RetriesTotal *prometheus.CounterVec } // NewPrometheusPlugin creates a new PrometheusPlugin with initialized metrics. @@ -60,6 +61,7 @@ func Init(pricingManager *pricing.PricingManager, logger schemas.Logger) *Promet OutputTokensTotal: bifrostOutputTokensTotal, CacheHitsTotal: bifrostCacheHitsTotal, CostTotal: bifrostCostTotal, + RetriesTotal: bifrostRetriesTotal, } } @@ -172,6 +174,11 @@ func (p *PrometheusPlugin) PostHook(ctx *context.Context, result *schemas.Bifros p.SuccessRequestsTotal.WithLabelValues(promLabelValues...).Inc() } + // Record retries if available in context + if retries, ok := (*ctx).Value(schemas.BifrostContextKeyRetryCount).(int); ok && retries > 0 { + p.RetriesTotal.WithLabelValues(promLabelValues...).Add(float64(retries)) + } + // Record input and output tokens if result.Usage != nil { p.InputTokensTotal.WithLabelValues(promLabelValues...).Add(float64(result.Usage.PromptTokens)) diff --git a/plugins/telemetry/setup.go b/plugins/telemetry/setup.go index 70ae1ed4e..5c9aa7468 100644 --- a/plugins/telemetry/setup.go +++ b/plugins/telemetry/setup.go @@ -49,6 +49,9 @@ var ( // bifrostCostTotal tracks the total cost in USD for requests to upstream providers bifrostCostTotal *prometheus.CounterVec + // bifrostRetriesTotal tracks the total number of retries performed by Bifrost. + bifrostRetriesTotal *prometheus.CounterVec + // customLabels stores the expected label names in order customLabels []string isInitialized bool @@ -170,6 +173,14 @@ func InitPrometheusMetrics(labels []string) { }, append(bifrostDefaultLabels, labels...), ) + + bifrostRetriesTotal = promauto.NewCounterVec( + prometheus.CounterOpts{ + Name: "bifrost_retries_total", + Help: "Total number of retries performed by Bifrost.", + }, + append(bifrostDefaultLabels, labels...), + ) isInitialized = true } diff --git a/transports/bifrost-http/handlers/completions.go b/transports/bifrost-http/handlers/completions.go index 99c6c518f..db0e3eb83 100644 --- a/transports/bifrost-http/handlers/completions.go +++ b/transports/bifrost-http/handlers/completions.go @@ -427,6 +427,10 @@ func (h *CompletionHandler) transcriptionCompletion(ctx *fasthttp.RequestCtx) { // Make transcription request resp, bifrostErr := h.client.TranscriptionRequest(*bifrostCtx, bifrostReq) + if r := getRetryCount(bifrostCtx); r >= 0 { + ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r)) + } + // Handle response if bifrostErr != nil { SendBifrostError(ctx, bifrostErr, h.logger) @@ -574,6 +578,10 @@ func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionTy resp, bifrostErr = h.client.SpeechRequest(*bifrostCtx, bifrostReq) } + if r := getRetryCount(bifrostCtx); r >= 0 { + ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r)) + } + // Handle response if bifrostErr != nil { SendBifrostError(ctx, bifrostErr, h.logger) @@ -593,6 +601,11 @@ func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionTy return } + // // getting (handlers) + if r := getRetryCount(bifrostCtx); r >= 0 { + ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r)) + } + // Send successful response SendJSON(ctx, resp, h.logger) } @@ -666,6 +679,10 @@ func (h *CompletionHandler) handleStreamingChatCompletion(ctx *fasthttp.RequestC return response, true } + if r := getRetryCount(bifrostCtx); r >= 0 { + ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r)) + } + h.handleStreamingResponse(ctx, getStream, extractResponse) } @@ -698,5 +715,9 @@ func (h *CompletionHandler) handleStreamingTranscriptionRequest(ctx *fasthttp.Re return response.Transcribe, true } + if r := getRetryCount(bifrostCtx); r >= 0 { + ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r)) + } + h.handleStreamingResponse(ctx, getStream, extractResponse) } diff --git a/transports/bifrost-http/handlers/utils.go b/transports/bifrost-http/handlers/utils.go index 80ed171c9..29eacf38f 100644 --- a/transports/bifrost-http/handlers/utils.go +++ b/transports/bifrost-http/handlers/utils.go @@ -3,6 +3,7 @@ package handlers import ( + "context" "encoding/json" "fmt" "slices" @@ -110,3 +111,12 @@ func ParseModel(model string) (string, string, error) { } return provider, name, nil } + +func getRetryCount(ctx *context.Context) int { + if ctx != nil { + if v, ok := (*ctx).Value(schemas.BifrostContextKeyRetryCount).(int); ok { + return v + } + } + return -1 +}