maximhq · sureshkrishnan-v · Sep 15, 2025 · Sep 16, 2025 · Sep 16, 2025 · Pratham-Mishra04
diff --git a/core/bifrost.go b/core/bifrost.go
@@ -1234,6 +1234,9 @@ func (bifrost *Bifrost) requestWorker(provider schemas.Provider, config *schemas
 			}
 		}
 
+		// Save retry count into context
+		req.Context = context.WithValue(req.Context, schemas.BifrostContextKeyRetryCount, attempts)
+
 		if bifrostError != nil {
 			// Add retry information to error
 			if attempts > 0 {

diff --git a/core/schemas/bifrost.go b/core/schemas/bifrost.go
@@ -105,6 +105,7 @@ const (
 	BifrostContextKeyRequestType        BifrostContextKey = "bifrost-request-type"
 	BifrostContextKeyRequestProvider    BifrostContextKey = "bifrost-request-provider"
 	BifrostContextKeyRequestModel       BifrostContextKey = "bifrost-request-model"
+	BifrostContextKeyRetryCount         BifrostContextKey = "bifrost-retry-count"
 )
 
 // NOTE: for custom plugin implementation dealing with streaming short circuit,

diff --git a/docs/features/telemetry.mdx b/docs/features/telemetry.mdx
@@ -49,6 +49,7 @@ These metrics track requests forwarded to AI providers:
 | `bifrost_output_tokens_total` | Counter | Total output tokens received from upstream providers | `provider`, `model`, `method`, custom labels |
 | `bifrost_cache_hits_total` | Counter | Total cache hits by type (direct/semantic) | `provider`, `model`, `method`, `cache_type`, custom labels |
 | `bifrost_cost_total` | Counter | Total cost in USD for upstream provider requests | `provider`, `model`, `method`, custom labels |
+| `bifrost_retries_total` | Counter | Total number of retries performed before a successful request or fallback | `provider`, `model`, `method`, custom labels |
 
 **Label Definitions:**
 - `provider`: AI provider name (e.g., `openai`, `anthropic`, `azure`)
@@ -57,6 +58,7 @@ These metrics track requests forwarded to AI providers:
 - `cache_type`: Cache hit type (`direct`, `semantic`) - only for cache hits metric
 - `path`: HTTP endpoint path
 - `status`: HTTP status code
+- `retries`: Number of retry attempts (recorded as metric value, not label)
 
 ---
 
@@ -123,6 +125,19 @@ rate(bifrost_upstream_requests_total[5m]) * 100
 
 # Errors by model
 sum by (model) (rate(bifrost_error_requests_total[5m]))
+
+### Retry Analysis
+Track how often retries are happening per provider and model:
+
+```promql
+# Retry rate per provider
+rate(bifrost_retries_total[5m])
+
+# Retries per request ratio
+rate(bifrost_retries_total[5m]) / rate(bifrost_upstream_requests_total[5m])
+
+# High retry detection (e.g. >1 retry/request on average)
+rate(bifrost_retries_total[5m]) / rate(bifrost_upstream_requests_total[5m]) > 1
 ```
 
 ---

diff --git a/plugins/telemetry/main.go b/plugins/telemetry/main.go
@@ -42,6 +42,7 @@ type PrometheusPlugin struct {
 	OutputTokensTotal     *prometheus.CounterVec
 	CacheHitsTotal        *prometheus.CounterVec
 	CostTotal             *prometheus.CounterVec
+	RetriesTotal          *prometheus.CounterVec
 }
 
 // NewPrometheusPlugin creates a new PrometheusPlugin with initialized metrics.
@@ -60,6 +61,7 @@ func Init(pricingManager *pricing.PricingManager, logger schemas.Logger) *Promet
 		OutputTokensTotal:     bifrostOutputTokensTotal,
 		CacheHitsTotal:        bifrostCacheHitsTotal,
 		CostTotal:             bifrostCostTotal,
+		RetriesTotal:          bifrostRetriesTotal,
 	}
 }
 
@@ -172,6 +174,11 @@ func (p *PrometheusPlugin) PostHook(ctx *context.Context, result *schemas.Bifros
 			p.SuccessRequestsTotal.WithLabelValues(promLabelValues...).Inc()
 		}
 
+		// Record retries if available in context
+		if retries, ok := (*ctx).Value(schemas.BifrostContextKeyRetryCount).(int); ok && retries > 0 {
+			p.RetriesTotal.WithLabelValues(promLabelValues...).Add(float64(retries))
+		}
+
 		// Record input and output tokens
 		if result.Usage != nil {
 			p.InputTokensTotal.WithLabelValues(promLabelValues...).Add(float64(result.Usage.PromptTokens))

diff --git a/plugins/telemetry/setup.go b/plugins/telemetry/setup.go
@@ -49,6 +49,9 @@ var (
 	// bifrostCostTotal tracks the total cost in USD for requests to upstream providers
 	bifrostCostTotal *prometheus.CounterVec
 
+	// bifrostRetriesTotal tracks the total number of retries performed by Bifrost.
+	bifrostRetriesTotal *prometheus.CounterVec
+
 	// customLabels stores the expected label names in order
 	customLabels  []string
 	isInitialized bool
@@ -170,6 +173,14 @@ func InitPrometheusMetrics(labels []string) {
 		},
 		append(bifrostDefaultLabels, labels...),
 	)
+
+	bifrostRetriesTotal = promauto.NewCounterVec(
+		prometheus.CounterOpts{
+			Name: "bifrost_retries_total",
+			Help: "Total number of retries performed by Bifrost.",
+		},
+		append(bifrostDefaultLabels, labels...),
+	)
 
 	isInitialized = true
 }

diff --git a/transports/bifrost-http/handlers/completions.go b/transports/bifrost-http/handlers/completions.go
@@ -427,6 +427,10 @@ func (h *CompletionHandler) transcriptionCompletion(ctx *fasthttp.RequestCtx) {
 	// Make transcription request
 	resp, bifrostErr := h.client.TranscriptionRequest(*bifrostCtx, bifrostReq)
 
+	if r := getRetryCount(bifrostCtx); r >= 0 {
+		ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
+	}
+
 	// Handle response
 	if bifrostErr != nil {
 		SendBifrostError(ctx, bifrostErr, h.logger)
@@ -574,6 +578,10 @@ func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionTy
 		resp, bifrostErr = h.client.SpeechRequest(*bifrostCtx, bifrostReq)
 	}
 
+	if r := getRetryCount(bifrostCtx); r >= 0 {
+		ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
+	}
+
 	// Handle response
 	if bifrostErr != nil {
 		SendBifrostError(ctx, bifrostErr, h.logger)
@@ -593,6 +601,11 @@ func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionTy
 		return
 	}
 
+	// // getting (handlers)
+	if r := getRetryCount(bifrostCtx); r >= 0 {
+		ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
+	}
+
 	// Send successful response
 	SendJSON(ctx, resp, h.logger)
 }
@@ -666,6 +679,10 @@ func (h *CompletionHandler) handleStreamingChatCompletion(ctx *fasthttp.RequestC
 		return response, true
 	}
 
+	if r := getRetryCount(bifrostCtx); r >= 0 {
+		ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
+	}
+
 	h.handleStreamingResponse(ctx, getStream, extractResponse)
 }
 
@@ -698,5 +715,9 @@ func (h *CompletionHandler) handleStreamingTranscriptionRequest(ctx *fasthttp.Re
 		return response.Transcribe, true
 	}
 
+	if r := getRetryCount(bifrostCtx); r >= 0 {
+		ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
+	}
+
 	h.handleStreamingResponse(ctx, getStream, extractResponse)
 }
diff --git a/transports/bifrost-http/handlers/utils.go b/transports/bifrost-http/handlers/utils.go
@@ -3,6 +3,7 @@
 package handlers
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"slices"
@@ -110,3 +111,12 @@ func ParseModel(model string) (string, string, error) {
 	}
 	return provider, name, nil
 }
+
+func getRetryCount(ctx *context.Context) int {
+	if ctx != nil {
+		if v, ok := (*ctx).Value(schemas.BifrostContextKeyRetryCount).(int); ok {
+			return v
+		}
+	}
+	return -1
+}