Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions core/bifrost.go
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,9 @@ func (bifrost *Bifrost) requestWorker(provider schemas.Provider, config *schemas
}
}

// Save retry count into context
req.Context = context.WithValue(req.Context, schemas.BifrostContextKeyRetryCount, attempts)

if bifrostError != nil {
// Add retry information to error
if attempts > 0 {
Expand Down
1 change: 1 addition & 0 deletions core/schemas/bifrost.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ const (
BifrostContextKeyRequestType BifrostContextKey = "bifrost-request-type"
BifrostContextKeyRequestProvider BifrostContextKey = "bifrost-request-provider"
BifrostContextKeyRequestModel BifrostContextKey = "bifrost-request-model"
BifrostContextKeyRetryCount BifrostContextKey = "bifrost-retry-count"
)

// NOTE: for custom plugin implementation dealing with streaming short circuit,
Expand Down
15 changes: 15 additions & 0 deletions docs/features/telemetry.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ These metrics track requests forwarded to AI providers:
| `bifrost_output_tokens_total` | Counter | Total output tokens received from upstream providers | `provider`, `model`, `method`, custom labels |
| `bifrost_cache_hits_total` | Counter | Total cache hits by type (direct/semantic) | `provider`, `model`, `method`, `cache_type`, custom labels |
| `bifrost_cost_total` | Counter | Total cost in USD for upstream provider requests | `provider`, `model`, `method`, custom labels |
| `bifrost_retries_total` | Counter | Total number of retries performed before a successful request or fallback | `provider`, `model`, `method`, custom labels |

**Label Definitions:**
- `provider`: AI provider name (e.g., `openai`, `anthropic`, `azure`)
Expand All @@ -57,6 +58,7 @@ These metrics track requests forwarded to AI providers:
- `cache_type`: Cache hit type (`direct`, `semantic`) - only for cache hits metric
- `path`: HTTP endpoint path
- `status`: HTTP status code
- `retries`: Number of retry attempts (recorded as metric value, not label)

---

Expand Down Expand Up @@ -123,6 +125,19 @@ rate(bifrost_upstream_requests_total[5m]) * 100

# Errors by model
sum by (model) (rate(bifrost_error_requests_total[5m]))

### Retry Analysis
Track how often retries are happening per provider and model:

```promql
# Retry rate per provider
rate(bifrost_retries_total[5m])

# Retries per request ratio
rate(bifrost_retries_total[5m]) / rate(bifrost_upstream_requests_total[5m])

# High retry detection (e.g. >1 retry/request on average)
rate(bifrost_retries_total[5m]) / rate(bifrost_upstream_requests_total[5m]) > 1
```

---
Expand Down
7 changes: 7 additions & 0 deletions plugins/telemetry/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type PrometheusPlugin struct {
OutputTokensTotal *prometheus.CounterVec
CacheHitsTotal *prometheus.CounterVec
CostTotal *prometheus.CounterVec
RetriesTotal *prometheus.CounterVec
}

// NewPrometheusPlugin creates a new PrometheusPlugin with initialized metrics.
Expand All @@ -60,6 +61,7 @@ func Init(pricingManager *pricing.PricingManager, logger schemas.Logger) *Promet
OutputTokensTotal: bifrostOutputTokensTotal,
CacheHitsTotal: bifrostCacheHitsTotal,
CostTotal: bifrostCostTotal,
RetriesTotal: bifrostRetriesTotal,
}
}

Expand Down Expand Up @@ -172,6 +174,11 @@ func (p *PrometheusPlugin) PostHook(ctx *context.Context, result *schemas.Bifros
p.SuccessRequestsTotal.WithLabelValues(promLabelValues...).Inc()
}

// Record retries if available in context
if retries, ok := (*ctx).Value(schemas.BifrostContextKeyRetryCount).(int); ok && retries > 0 {
p.RetriesTotal.WithLabelValues(promLabelValues...).Add(float64(retries))
}

// Record input and output tokens
if result.Usage != nil {
p.InputTokensTotal.WithLabelValues(promLabelValues...).Add(float64(result.Usage.PromptTokens))
Expand Down
11 changes: 11 additions & 0 deletions plugins/telemetry/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ var (
// bifrostCostTotal tracks the total cost in USD for requests to upstream providers
bifrostCostTotal *prometheus.CounterVec

// bifrostRetriesTotal tracks the total number of retries performed by Bifrost.
bifrostRetriesTotal *prometheus.CounterVec

// customLabels stores the expected label names in order
customLabels []string
isInitialized bool
Expand Down Expand Up @@ -170,6 +173,14 @@ func InitPrometheusMetrics(labels []string) {
},
append(bifrostDefaultLabels, labels...),
)

bifrostRetriesTotal = promauto.NewCounterVec(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have retries count as a Histogram metric? would be much better for analytics

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I will add retries count as histogram metric

prometheus.CounterOpts{
Name: "bifrost_retries_total",
Help: "Total number of retries performed by Bifrost.",
},
append(bifrostDefaultLabels, labels...),
)

isInitialized = true
}
Expand Down
21 changes: 21 additions & 0 deletions transports/bifrost-http/handlers/completions.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,10 @@ func (h *CompletionHandler) transcriptionCompletion(ctx *fasthttp.RequestCtx) {
// Make transcription request
resp, bifrostErr := h.client.TranscriptionRequest(*bifrostCtx, bifrostReq)

if r := getRetryCount(bifrostCtx); r >= 0 {
ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
}

// Handle response
if bifrostErr != nil {
SendBifrostError(ctx, bifrostErr, h.logger)
Expand Down Expand Up @@ -574,6 +578,10 @@ func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionTy
resp, bifrostErr = h.client.SpeechRequest(*bifrostCtx, bifrostReq)
}

if r := getRetryCount(bifrostCtx); r >= 0 {
ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
}

// Handle response
if bifrostErr != nil {
SendBifrostError(ctx, bifrostErr, h.logger)
Expand All @@ -593,6 +601,11 @@ func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionTy
return
}

// // getting (handlers)
if r := getRetryCount(bifrostCtx); r >= 0 {
ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
}

// Send successful response
SendJSON(ctx, resp, h.logger)
}
Expand Down Expand Up @@ -666,6 +679,10 @@ func (h *CompletionHandler) handleStreamingChatCompletion(ctx *fasthttp.RequestC
return response, true
}

if r := getRetryCount(bifrostCtx); r >= 0 {
ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
}

h.handleStreamingResponse(ctx, getStream, extractResponse)
}

Expand Down Expand Up @@ -698,5 +715,9 @@ func (h *CompletionHandler) handleStreamingTranscriptionRequest(ctx *fasthttp.Re
return response.Transcribe, true
}

if r := getRetryCount(bifrostCtx); r >= 0 {
ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
}

h.handleStreamingResponse(ctx, getStream, extractResponse)
}
10 changes: 10 additions & 0 deletions transports/bifrost-http/handlers/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package handlers

import (
"context"
"encoding/json"
"fmt"
"slices"
Expand Down Expand Up @@ -110,3 +111,12 @@ func ParseModel(model string) (string, string, error) {
}
return provider, name, nil
}

func getRetryCount(ctx *context.Context) int {
if ctx != nil {
if v, ok := (*ctx).Value(schemas.BifrostContextKeyRetryCount).(int); ok {
return v
}
}
return -1
}