Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions core/bifrost.go
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,9 @@ func (bifrost *Bifrost) requestWorker(provider schemas.Provider, config *schemas
}
}

// Save retry count into context
req.Context = context.WithValue(req.Context, schemas.BifrostContextKeyRetryCount, attempts)

if bifrostError != nil {
// Add retry information to error
if attempts > 0 {
Expand Down
1 change: 1 addition & 0 deletions core/schemas/bifrost.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ const (
BifrostContextKeyRequestType BifrostContextKey = "bifrost-request-type"
BifrostContextKeyRequestProvider BifrostContextKey = "bifrost-request-provider"
BifrostContextKeyRequestModel BifrostContextKey = "bifrost-request-model"
BifrostContextKeyRetryCount BifrostContextKey = "bifrost-retry-count"
)

// NOTE: for custom plugin implementation dealing with streaming short circuit,
Expand Down
15 changes: 15 additions & 0 deletions docs/features/telemetry.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ These metrics track requests forwarded to AI providers:
| `bifrost_output_tokens_total` | Counter | Total output tokens received from upstream providers | `provider`, `model`, `method`, custom labels |
| `bifrost_cache_hits_total` | Counter | Total cache hits by type (direct/semantic) | `provider`, `model`, `method`, `cache_type`, custom labels |
| `bifrost_cost_total` | Counter | Total cost in USD for upstream provider requests | `provider`, `model`, `method`, custom labels |
| `bifrost_retries_total` | Counter | Total number of retries performed before a successful request or fallback | `provider`, `model`, `method`, custom labels |

**Label Definitions:**
- `provider`: AI provider name (e.g., `openai`, `anthropic`, `azure`)
Expand All @@ -57,6 +58,7 @@ These metrics track requests forwarded to AI providers:
- `cache_type`: Cache hit type (`direct`, `semantic`) - only for cache hits metric
- `path`: HTTP endpoint path
- `status`: HTTP status code
- `retries`: Number of retry attempts (recorded as metric value, not label)

---

Expand Down Expand Up @@ -123,6 +125,19 @@ rate(bifrost_upstream_requests_total[5m]) * 100

# Errors by model
sum by (model) (rate(bifrost_error_requests_total[5m]))

### Retry Analysis
Track how often retries are happening per provider and model:

```promql
# Retry rate per provider
rate(bifrost_retries_total[5m])

# Retries per request ratio
rate(bifrost_retries_total[5m]) / rate(bifrost_upstream_requests_total[5m])

# High retry detection (e.g. >1 retry/request on average)
rate(bifrost_retries_total[5m]) / rate(bifrost_upstream_requests_total[5m]) > 1
```

---
Expand Down
7 changes: 7 additions & 0 deletions plugins/telemetry/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ type PrometheusPlugin struct {
OutputTokensTotal *prometheus.CounterVec
CacheHitsTotal *prometheus.CounterVec
CostTotal *prometheus.CounterVec
RetriesTotal *prometheus.CounterVec
}

// NewPrometheusPlugin creates a new PrometheusPlugin with initialized metrics.
Expand All @@ -60,6 +61,7 @@ func Init(pricingManager *pricing.PricingManager, logger schemas.Logger) *Promet
OutputTokensTotal: bifrostOutputTokensTotal,
CacheHitsTotal: bifrostCacheHitsTotal,
CostTotal: bifrostCostTotal,
RetriesTotal: bifrostRetriesTotal,
}
}

Expand Down Expand Up @@ -172,6 +174,11 @@ func (p *PrometheusPlugin) PostHook(ctx *context.Context, result *schemas.Bifros
p.SuccessRequestsTotal.WithLabelValues(promLabelValues...).Inc()
}

// Record retries if available in context
if retries, ok := (*ctx).Value(schemas.BifrostContextKeyRetryCount).(int); ok && retries > 0 {
p.RetriesTotal.WithLabelValues(promLabelValues...).Add(float64(retries))
}

// Record input and output tokens
if result.Usage != nil {
p.InputTokensTotal.WithLabelValues(promLabelValues...).Add(float64(result.Usage.PromptTokens))
Expand Down
11 changes: 11 additions & 0 deletions plugins/telemetry/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ var (
// bifrostCostTotal tracks the total cost in USD for requests to upstream providers
bifrostCostTotal *prometheus.CounterVec

// bifrostRetriesTotal tracks the total number of retries performed by Bifrost.
bifrostRetriesTotal *prometheus.CounterVec

// customLabels stores the expected label names in order
customLabels []string
isInitialized bool
Expand Down Expand Up @@ -170,6 +173,14 @@ func InitPrometheusMetrics(labels []string) {
},
append(bifrostDefaultLabels, labels...),
)

bifrostRetriesTotal = promauto.NewCounterVec(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have retries count as a Histogram metric? would be much better for analytics

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I will add retries count as histogram metric

prometheus.CounterOpts{
Name: "bifrost_retries_total",
Help: "Total number of retries performed by Bifrost.",
},
append(bifrostDefaultLabels, labels...),
)

isInitialized = true
}
Expand Down
21 changes: 21 additions & 0 deletions transports/bifrost-http/handlers/completions.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,10 @@ func (h *CompletionHandler) transcriptionCompletion(ctx *fasthttp.RequestCtx) {
// Make transcription request
resp, bifrostErr := h.client.TranscriptionRequest(*bifrostCtx, bifrostReq)

if r := getRetryCount(bifrostCtx); r >= 0 {
ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
}

// Handle response
if bifrostErr != nil {
SendBifrostError(ctx, bifrostErr, h.logger)
Expand Down Expand Up @@ -574,6 +578,10 @@ func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionTy
resp, bifrostErr = h.client.SpeechRequest(*bifrostCtx, bifrostReq)
}

if r := getRetryCount(bifrostCtx); r >= 0 {
ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
}

// Handle response
if bifrostErr != nil {
SendBifrostError(ctx, bifrostErr, h.logger)
Expand All @@ -593,6 +601,11 @@ func (h *CompletionHandler) handleRequest(ctx *fasthttp.RequestCtx, completionTy
return
}

// // getting (handlers)
if r := getRetryCount(bifrostCtx); r >= 0 {
ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
}

// Send successful response
SendJSON(ctx, resp, h.logger)
}
Expand Down Expand Up @@ -666,6 +679,10 @@ func (h *CompletionHandler) handleStreamingChatCompletion(ctx *fasthttp.RequestC
return response, true
}

if r := getRetryCount(bifrostCtx); r >= 0 {
ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
}

h.handleStreamingResponse(ctx, getStream, extractResponse)
}

Expand Down Expand Up @@ -698,5 +715,9 @@ func (h *CompletionHandler) handleStreamingTranscriptionRequest(ctx *fasthttp.Re
return response.Transcribe, true
}

if r := getRetryCount(bifrostCtx); r >= 0 {
ctx.Response.Header.Set("x-bf-retries", strconv.Itoa(r))
}

h.handleStreamingResponse(ctx, getStream, extractResponse)
}
10 changes: 10 additions & 0 deletions transports/bifrost-http/handlers/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package handlers

import (
"context"
"encoding/json"
"fmt"
"slices"
Expand Down Expand Up @@ -110,3 +111,12 @@ func ParseModel(model string) (string, string, error) {
}
return provider, name, nil
}

func getRetryCount(bctx *context.Context) int {
if bctx != nil {
if v, ok := (*bctx).Value(schemas.BifrostContextKeyRetryCount).(int); ok {
return v
}
}
return -1
}