Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions slo/promql.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package slo

import (
"fmt"
"sort"
"time"

"github.com/prometheus/common/model"
Expand Down Expand Up @@ -633,14 +634,15 @@ func (o Objective) RequestRange(timerange time.Duration) string {

return expr.String()
case LatencyNative:
expr, err := parser.ParseExpr(`sum(histogram_count(rate(metric{}[1s])))`)
expr, err := parser.ParseExpr(`sum by (grouping) (histogram_count(rate(metric{}[1s])))`)
if err != nil {
return err.Error()
}

objectiveReplacer{
metric: o.Indicator.LatencyNative.Total.Name,
matchers: o.Indicator.LatencyNative.Total.LabelMatchers,
grouping: o.Indicator.LatencyNative.Grouping,
window: timerange,
}.replace(expr)

Expand Down Expand Up @@ -716,14 +718,26 @@ func (o Objective) ErrorsRange(timerange time.Duration) string {

return expr.String()
case LatencyNative:
expr, err := parser.ParseExpr(`1 - histogram_fraction(0,0.696969, sum(rate(metric{matchers="total"}[1s])))`)
expr, err := parser.ParseExpr(`1 - histogram_fraction(0,0.696969, sum by (grouping) (rate(metric{matchers="total"}[1s])))`)
if err != nil {
return err.Error()
}

groupingMap := map[string]struct{}{}
for _, s := range o.Indicator.LatencyNative.Grouping {
groupingMap[s] = struct{}{}
}

grouping := make([]string, 0, len(groupingMap))
for s := range groupingMap {
grouping = append(grouping, s)
}
sort.Strings(grouping)

objectiveReplacer{
metric: o.Indicator.LatencyNative.Total.Name,
matchers: o.Indicator.LatencyNative.Total.LabelMatchers,
grouping: grouping,
window: timerange,
target: time.Duration(o.Indicator.LatencyNative.Latency).Seconds(),
}.replace(expr)
Expand Down Expand Up @@ -772,7 +786,7 @@ func (o Objective) DurationRange(timerange time.Duration, percentile float64) st

return expr.String()
case LatencyNative:
expr, err := parser.ParseExpr(`histogram_quantile(0.420, sum(rate(metric{matchers="total"}[1s])))`)
expr, err := parser.ParseExpr(`histogram_quantile(0.420, sum by (grouping) (rate(metric{matchers="total"}[1s])))`)
if err != nil {
return err.Error()
}
Expand Down
34 changes: 34 additions & 0 deletions slo/promql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package slo

import (
"strconv"
"strings"
"testing"
"time"

Expand Down Expand Up @@ -146,6 +147,11 @@ var (
}
return o
}
objectiveHTTPNativeLatencyGrouping = func() Objective {
o := objectiveHTTPNativeLatency()
o.Indicator.LatencyNative.Grouping = []string{"job", "handler"}
return o
}
objectiveHTTPLatencyGrouping = func() Objective {
o := objectiveHTTPLatency()
o.Indicator.Latency.Grouping = []string{"job", "handler"}
Expand Down Expand Up @@ -641,6 +647,13 @@ func TestObjective_QueryBurnrate(t *testing.T) {
name: "http-latency-native",
objective: objectiveHTTPNativeLatency(),
expected: `http_request_duration_seconds:burnrate5m{code=~"2..",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"}`,
}, {
name: "http-latency-native-grouping",
objective: objectiveHTTPNativeLatencyGrouping(),
grouping: []*labels.Matcher{
{Type: labels.MatchEqual, Name: "handler", Value: "/api/v1/query"},
},
expected: `http_request_duration_seconds:burnrate5m{code=~"2..",handler="/api/v1/query",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"}`,
}, {
name: "http-latency-grouping",
objective: objectiveHTTPLatencyGrouping(),
Expand Down Expand Up @@ -1086,3 +1099,24 @@ func TestReplacer(t *testing.T) {
})
}
}

func TestLatencyNativeBurnrateGrouping(t *testing.T) {
objective := objectiveHTTPNativeLatencyGrouping()

burnrateQuery := objective.Burnrate(5 * time.Minute)

require.True(t,
strings.Contains(burnrateQuery, "sum by (handler, job)") || strings.Contains(burnrateQuery, "sum by (job, handler)"),
"LatencyNative burnrate query should include grouping, got: %s", burnrateQuery)
require.Contains(t, burnrateQuery, "histogram_fraction", "LatencyNative burnrate should use histogram_fraction")

requestRangeQuery := objective.RequestRange(2 * time.Hour)
require.True(t,
strings.Contains(requestRangeQuery, "sum by (handler, job)") || strings.Contains(requestRangeQuery, "sum by (job, handler)"),
"LatencyNative RequestRange should include grouping, got: %s", requestRangeQuery)

errorsRangeQuery := objective.ErrorsRange(2 * time.Hour)
require.True(t,
strings.Contains(errorsRangeQuery, "sum by (handler, job)") || strings.Contains(errorsRangeQuery, "sum by (job, handler)"),
"LatencyNative ErrorsRange should include grouping, got: %s", errorsRangeQuery)
}
6 changes: 3 additions & 3 deletions slo/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ func (o Objective) Burnrate(timerange time.Duration) string {

return expr.String()
case LatencyNative:
expr, err := parser.ParseExpr(`1 - histogram_fraction(0,0.696969, sum(rate(metric{matchers="total"}[1s])))`)
expr, err := parser.ParseExpr(`1 - histogram_fraction(0,0.696969, sum by (grouping) (rate(metric{matchers="total"}[1s])))`)
if err != nil {
return err.Error()
}
Expand Down Expand Up @@ -903,7 +903,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) {
}
}

expr, err := parser.ParseExpr(`histogram_count(sum(increase(metric{matchers="total"}[1s])))`)
expr, err := parser.ParseExpr(`histogram_count(sum by (grouping) (increase(metric{matchers="total"}[1s])))`)
if err != nil {
return monitoringv1.RuleGroup{}, err
}
Expand All @@ -921,7 +921,7 @@ func (o Objective) IncreaseRules() (monitoringv1.RuleGroup, error) {
Labels: ruleLabels,
})

expr, err = parser.ParseExpr(`histogram_fraction(0, 0.696969, sum(increase(metric{matchers="total"}[1s]))) * histogram_count(sum(increase(metric{matchers="total"}[1s])))`)
expr, err = parser.ParseExpr(`histogram_fraction(0, 0.696969, sum by (grouping) (increase(metric{matchers="total"}[1s]))) * histogram_count(sum by (grouping) (increase(metric{matchers="total"}[1s])))`)
if err != nil {
return monitoringv1.RuleGroup{}, err
}
Expand Down