From cf69cd7b2755b9893ff774e9d3fc2e323fa1f7c4 Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Sat, 15 Apr 2023 10:51:36 -0400 Subject: [PATCH 1/5] Add `success_duration` --- modules/caddyhttp/reverseproxy/admin.go | 2 + modules/caddyhttp/reverseproxy/caddyfile.go | 17 ++++++ .../caddyhttp/reverseproxy/healthchecks.go | 57 ++++++++++++++++++- modules/caddyhttp/reverseproxy/hosts.go | 16 ++++++ .../caddyhttp/reverseproxy/reverseproxy.go | 3 + 5 files changed, 93 insertions(+), 2 deletions(-) diff --git a/modules/caddyhttp/reverseproxy/admin.go b/modules/caddyhttp/reverseproxy/admin.go index f64d1ecf0aa..5bdd410e36a 100644 --- a/modules/caddyhttp/reverseproxy/admin.go +++ b/modules/caddyhttp/reverseproxy/admin.go @@ -37,6 +37,7 @@ type upstreamStatus struct { Address string `json:"address"` NumRequests int `json:"num_requests"` Fails int `json:"fails"` + Successes int `json:"successes"` } // CaddyModule returns the Caddy module information. @@ -99,6 +100,7 @@ func (adminUpstreams) handleUpstreams(w http.ResponseWriter, r *http.Request) er Address: address, NumRequests: upstream.NumRequests(), Fails: upstream.Fails(), + Successes: upstream.Successes(), }) return true }) diff --git a/modules/caddyhttp/reverseproxy/caddyfile.go b/modules/caddyhttp/reverseproxy/caddyfile.go index fc8eed60950..4f27ca1ef8a 100644 --- a/modules/caddyhttp/reverseproxy/caddyfile.go +++ b/modules/caddyhttp/reverseproxy/caddyfile.go @@ -77,6 +77,7 @@ func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error) // # passive health checking // fail_duration // max_fails +// success_duration // unhealthy_status // unhealthy_latency // unhealthy_request_count @@ -422,6 +423,22 @@ func (h *Handler) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { } h.HealthChecks.Passive.MaxFails = maxFails + case "success_duration": + if !d.NextArg() { + return d.ArgErr() + } + if h.HealthChecks == nil { + h.HealthChecks = new(HealthChecks) + } + if h.HealthChecks.Passive == nil { + h.HealthChecks.Passive = new(PassiveHealthChecks) + } + dur, err := caddy.ParseDuration(d.Val()) + if err != nil { + return d.Errf("bad duration value '%s': %v", d.Val(), err) + } + h.HealthChecks.Passive.SuccessDuration = caddy.Duration(dur) + case "fail_duration": if !d.NextArg() { return d.ArgErr() diff --git a/modules/caddyhttp/reverseproxy/healthchecks.go b/modules/caddyhttp/reverseproxy/healthchecks.go index cfc7bdff8ed..1362afa89a8 100644 --- a/modules/caddyhttp/reverseproxy/healthchecks.go +++ b/modules/caddyhttp/reverseproxy/healthchecks.go @@ -110,8 +110,8 @@ type ActiveHealthChecks struct { // health checks (that is, health checks which occur during // the normal flow of request proxying). type PassiveHealthChecks struct { - // How long to remember a failed request to a backend. A duration > 0 - // enables passive health checking. Default is 0. + // How long to remember a failed request to a backend. + // A duration > 0 enables passive health checking. Default is 0. FailDuration caddy.Duration `json:"fail_duration,omitempty"` // The number of failed requests within the FailDuration window to @@ -119,6 +119,9 @@ type PassiveHealthChecks struct { // that FailDuration be > 0. MaxFails int `json:"max_fails,omitempty"` + // How long to remember a successful request to a backend. Default is 0. + SuccessDuration caddy.Duration `json:"success_duration,omitempty"` + // Limits the number of simultaneous requests to a backend by // marking the backend as "down" if it has this many concurrent // requests or more. @@ -362,6 +365,56 @@ func (h *Handler) doActiveHealthCheck(dialInfo DialInfo, hostAddr string, upstre return nil } +// countSuccess is used with passive health checks. It +// remembers 1 success for upstream for the configured +// duration. If passive health checks are disabled or +// success expiry is 0, this is a no-op. +func (h *Handler) countSuccess(upstream *Upstream) { + // only count successes if passive health checking is enabled + // and if successes are configured have a non-zero expiry + if h.HealthChecks == nil || h.HealthChecks.Passive == nil { + return + } + successDuration := time.Duration(h.HealthChecks.Passive.SuccessDuration) + if successDuration == 0 { + return + } + + // count success immediately + err := upstream.Host.countSuccess(1) + if err != nil { + h.HealthChecks.Passive.logger.Error("could not count success", + zap.String("host", upstream.Dial), + zap.Error(err)) + return + } + + // forget it later + go func(host *Host, successDuration time.Duration) { + defer func() { + if err := recover(); err != nil { + h.HealthChecks.Passive.logger.Error("passive health check success forgetter panicked", + zap.Any("error", err), + zap.ByteString("stack", debug.Stack())) + } + }() + timer := time.NewTimer(successDuration) + select { + case <-h.ctx.Done(): + if !timer.Stop() { + <-timer.C + } + case <-timer.C: + } + err := host.countSuccess(-1) + if err != nil { + h.HealthChecks.Passive.logger.Error("could not forget success", + zap.String("host", upstream.Dial), + zap.Error(err)) + } + }(upstream.Host, successDuration) +} + // countFailure is used with passive health checks. It // remembers 1 failure for upstream for the configured // duration. If passive health checks are disabled or diff --git a/modules/caddyhttp/reverseproxy/hosts.go b/modules/caddyhttp/reverseproxy/hosts.go index 298d4f3216c..6815ea6269c 100644 --- a/modules/caddyhttp/reverseproxy/hosts.go +++ b/modules/caddyhttp/reverseproxy/hosts.go @@ -136,6 +136,7 @@ func (u *Upstream) fillHost() { // Its fields are accessed atomically and Host values must not be copied. type Host struct { numRequests int64 // must be 64-bit aligned on 32-bit systems (see https://golang.org/pkg/sync/atomic/#pkg-note-BUG) + successes int64 fails int64 } @@ -144,6 +145,11 @@ func (h *Host) NumRequests() int { return int(atomic.LoadInt64(&h.numRequests)) } +// Successes returns the number of recent successes with the upstream. +func (h *Host) Successes() int { + return int(atomic.LoadInt64(&h.successes)) +} + // Fails returns the number of recent failures with the upstream. func (h *Host) Fails() int { return int(atomic.LoadInt64(&h.fails)) @@ -159,6 +165,16 @@ func (h *Host) countRequest(delta int) error { return nil } +// countSuccess mutates the recent successes count by +// delta. It returns an error if the adjustment fails. +func (h *Host) countSuccess(delta int) error { + result := atomic.AddInt64(&h.successes, int64(delta)) + if result < 0 { + return fmt.Errorf("count below 0: %d", result) + } + return nil +} + // countFail mutates the recent failures count by // delta. It returns an error if the adjustment fails. func (h *Host) countFail(delta int) error { diff --git a/modules/caddyhttp/reverseproxy/reverseproxy.go b/modules/caddyhttp/reverseproxy/reverseproxy.go index 367b8a27777..ad0bcfcf033 100644 --- a/modules/caddyhttp/reverseproxy/reverseproxy.go +++ b/modules/caddyhttp/reverseproxy/reverseproxy.go @@ -562,6 +562,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h repl.Set("http.reverse_proxy.upstream.port", dialInfo.Port) repl.Set("http.reverse_proxy.upstream.requests", upstream.Host.NumRequests()) repl.Set("http.reverse_proxy.upstream.max_requests", upstream.MaxRequests) + repl.Set("http.reverse_proxy.upstream.successes", upstream.Host.Successes()) repl.Set("http.reverse_proxy.upstream.fails", upstream.Host.Fails()) // mutate request headers according to this upstream; @@ -580,6 +581,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h if proxyErr == nil || errors.Is(proxyErr, context.Canceled) { // context.Canceled happens when the downstream client // cancels the request, which is not our failure + h.countSuccess(upstream) return true, nil } @@ -588,6 +590,7 @@ func (h *Handler) proxyLoopIteration(r *http.Request, origReq *http.Request, w h // occur after the roundtrip if, for example, a response handler // after the roundtrip returns an error) if succ, ok := proxyErr.(roundtripSucceeded); ok { + h.countSuccess(upstream) return true, succ.error } From c4b934f232d32698d6fc7f78e45d609c64b7cfac Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Sat, 15 Apr 2023 11:33:59 -0400 Subject: [PATCH 2/5] Add `caddyhttp.Ratio` type --- modules/caddyhttp/caddyhttp.go | 79 ++++++++++++++++++++++++++++- modules/caddyhttp/caddyhttp_test.go | 76 +++++++++++++++++++++++++++ 2 files changed, 154 insertions(+), 1 deletion(-) diff --git a/modules/caddyhttp/caddyhttp.go b/modules/caddyhttp/caddyhttp.go index c497dc7a1d2..bcb62198525 100644 --- a/modules/caddyhttp/caddyhttp.go +++ b/modules/caddyhttp/caddyhttp.go @@ -17,6 +17,7 @@ package caddyhttp import ( "bytes" "encoding/json" + "fmt" "io" "net" "net/http" @@ -164,7 +165,7 @@ func (ws *WeakString) UnmarshalJSON(b []byte) error { return nil } -// MarshalJSON marshals was a boolean if true or false, +// MarshalJSON marshals as a boolean if true or false, // a number if an integer, or a string otherwise. func (ws WeakString) MarshalJSON() ([]byte, error) { if ws == "true" { @@ -204,6 +205,82 @@ func (ws WeakString) String() string { return string(ws) } +// Ratio is a type that unmarshals a valid numerical ratio string. +// Valid formats are: +// - a/b as a fraction (a / b) +// - a:b as a ratio (a / a+b) +// - a floating point number +type Ratio float64 + +// UnmarshalJSON satisfies json.Unmarshaler according to +// this type's documentation. +func (r *Ratio) UnmarshalJSON(b []byte) error { + if len(b) == 0 { + return io.EOF + } + if b[0] == byte('"') && b[len(b)-1] == byte('"') { + if !strings.Contains(string(b), "/") && !strings.Contains(string(b), ":") { + return fmt.Errorf("ratio string '%s' did not contain a slash '/' or colon ':'", string(b[1:len(b)-1])) + } + if strings.Contains(string(b), "/") { + left, right, _ := strings.Cut(string(b[1:len(b)-1]), "/") + num, err := strconv.Atoi(left) + if err != nil { + return fmt.Errorf("failed parsing numerator as integer %s: %v", left, err) + } + denom, err := strconv.Atoi(right) + if err != nil { + return fmt.Errorf("failed parsing denominator as integer %s: %v", right, err) + } + *r = Ratio(float64(num) / float64(denom)) + return nil + } + if strings.Contains(string(b), ":") { + left, right, _ := strings.Cut(string(b[1:len(b)-1]), ":") + num, err := strconv.Atoi(left) + if err != nil { + return fmt.Errorf("failed parsing numerator as integer %s: %v", left, err) + } + denom, err := strconv.Atoi(right) + if err != nil { + return fmt.Errorf("failed parsing denominator as integer %s: %v", right, err) + } + *r = Ratio(float64(num) / (float64(num) + float64(denom))) + return nil + } + return fmt.Errorf("invalid ratio string '%s'", string(b[1:len(b)-1])) + } + if bytes.Equal(b, []byte("null")) { + return nil + } + float, err := strconv.ParseFloat(string(b), 64) + if err != nil { + return fmt.Errorf("failed parsing ratio as float %s: %v", b, err) + } + *r = Ratio(float) + return nil +} + +func ParseRatio(r string) (Ratio, error) { + if strings.Contains(r, "/") { + left, right, _ := strings.Cut(r, "/") + num, err := strconv.Atoi(left) + if err != nil { + return 0, fmt.Errorf("failed parsing numerator as integer %s: %v", left, err) + } + denom, err := strconv.Atoi(right) + if err != nil { + return 0, fmt.Errorf("failed parsing denominator as integer %s: %v", right, err) + } + return Ratio(float64(num) / float64(denom)), nil + } + float, err := strconv.ParseFloat(r, 64) + if err != nil { + return 0, fmt.Errorf("failed parsing ratio as float %s: %v", r, err) + } + return Ratio(float), nil +} + // StatusCodeMatches returns true if a real HTTP status code matches // the configured status code, which may be either a real HTTP status // code or an integer representing a class of codes (e.g. 4 for all diff --git a/modules/caddyhttp/caddyhttp_test.go b/modules/caddyhttp/caddyhttp_test.go index a14de781429..2091ca5973a 100644 --- a/modules/caddyhttp/caddyhttp_test.go +++ b/modules/caddyhttp/caddyhttp_test.go @@ -149,3 +149,79 @@ func TestCleanPath(t *testing.T) { } } } + +func TestUnmarshalRatio(t *testing.T) { + for i, tc := range []struct { + input []byte + expect float64 + errMsg string + }{ + { + input: []byte("null"), + expect: 0, + }, + { + input: []byte(`"1/3"`), + expect: float64(1) / float64(3), + }, + { + input: []byte(`"1/100"`), + expect: float64(1) / float64(100), + }, + { + input: []byte(`"3:2"`), + expect: 0.6, + }, + { + input: []byte(`"99:1"`), + expect: 0.99, + }, + { + input: []byte(`"1/100"`), + expect: float64(1) / float64(100), + }, + { + input: []byte(`0.1`), + expect: 0.1, + }, + { + input: []byte(`0.005`), + expect: 0.005, + }, + { + input: []byte(`0`), + expect: 0, + }, + { + input: []byte(`"0"`), + errMsg: `ratio string '0' did not contain a slash '/' or colon ':'`, + }, + { + input: []byte(`a`), + errMsg: `failed parsing ratio as float a: strconv.ParseFloat: parsing "a": invalid syntax`, + }, + { + input: []byte(`"a/1"`), + errMsg: `failed parsing numerator as integer a: strconv.Atoi: parsing "a": invalid syntax`, + }, + { + input: []byte(`"1/a"`), + errMsg: `failed parsing denominator as integer a: strconv.Atoi: parsing "a": invalid syntax`, + }, + } { + ratio := Ratio(0) + err := ratio.UnmarshalJSON(tc.input) + if err != nil { + if tc.errMsg != "" { + if tc.errMsg != err.Error() { + t.Fatalf("Test %d: expected error: %v, got: %v", i, tc.errMsg, err) + } + continue + } + t.Fatalf("Test %d: invalid ratio: %v", i, err) + } + if ratio != Ratio(tc.expect) { + t.Fatalf("Test %d: expected %v, got %v", i, tc.expect, ratio) + } + } +} From c8b8c3a7b2ee630a4a8ac4c42afdf41169ba36b4 Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Sun, 26 Feb 2023 02:16:05 -0500 Subject: [PATCH 3/5] Add `min_success_ratio` WIP --- modules/caddyhttp/reverseproxy/caddyfile.go | 17 +++++++++++++++++ modules/caddyhttp/reverseproxy/healthchecks.go | 5 +++++ 2 files changed, 22 insertions(+) diff --git a/modules/caddyhttp/reverseproxy/caddyfile.go b/modules/caddyhttp/reverseproxy/caddyfile.go index 4f27ca1ef8a..c12fb2ec0bd 100644 --- a/modules/caddyhttp/reverseproxy/caddyfile.go +++ b/modules/caddyhttp/reverseproxy/caddyfile.go @@ -78,6 +78,7 @@ func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error) // fail_duration // max_fails // success_duration +// min_success_ratio // unhealthy_status // unhealthy_latency // unhealthy_request_count @@ -439,6 +440,22 @@ func (h *Handler) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { } h.HealthChecks.Passive.SuccessDuration = caddy.Duration(dur) + case "min_success_ratio": + if !d.NextArg() { + return d.ArgErr() + } + if h.HealthChecks == nil { + h.HealthChecks = new(HealthChecks) + } + if h.HealthChecks.Passive == nil { + h.HealthChecks.Passive = new(PassiveHealthChecks) + } + ratio, err := caddyhttp.ParseRatio(d.Val()) + if err != nil { + return d.Errf("bad ratio value '%s': %v", d.Val(), err) + } + h.HealthChecks.Passive.MinSuccessRatio = ratio + case "fail_duration": if !d.NextArg() { return d.ArgErr() diff --git a/modules/caddyhttp/reverseproxy/healthchecks.go b/modules/caddyhttp/reverseproxy/healthchecks.go index 1362afa89a8..20ba0b9bfe3 100644 --- a/modules/caddyhttp/reverseproxy/healthchecks.go +++ b/modules/caddyhttp/reverseproxy/healthchecks.go @@ -122,6 +122,11 @@ type PassiveHealthChecks struct { // How long to remember a successful request to a backend. Default is 0. SuccessDuration caddy.Duration `json:"success_duration,omitempty"` + // The minimum ratio of successful to failed requests necessary to + // consider a backend as healthy. Both fail and success durations + // must be configured for those stats to be counted. Default is 0 (no ratio). + MinSuccessRatio caddyhttp.Ratio `json:"min_success_ratio,omitempty"` + // Limits the number of simultaneous requests to a backend by // marking the backend as "down" if it has this many concurrent // requests or more. From 2c61b50b5f4d206ea714d7b6e99e10b51ca01aed Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Sat, 15 Apr 2023 08:44:12 -0400 Subject: [PATCH 4/5] Add `min_successes` --- modules/caddyhttp/reverseproxy/caddyfile.go | 17 +++++++++++++++++ modules/caddyhttp/reverseproxy/healthchecks.go | 8 ++++++++ modules/caddyhttp/reverseproxy/reverseproxy.go | 4 ++++ 3 files changed, 29 insertions(+) diff --git a/modules/caddyhttp/reverseproxy/caddyfile.go b/modules/caddyhttp/reverseproxy/caddyfile.go index c12fb2ec0bd..119f433df29 100644 --- a/modules/caddyhttp/reverseproxy/caddyfile.go +++ b/modules/caddyhttp/reverseproxy/caddyfile.go @@ -79,6 +79,7 @@ func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error) // max_fails // success_duration // min_success_ratio +// min_success // unhealthy_status // unhealthy_latency // unhealthy_request_count @@ -456,6 +457,22 @@ func (h *Handler) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { } h.HealthChecks.Passive.MinSuccessRatio = ratio + case "min_successes": + if !d.NextArg() { + return d.ArgErr() + } + if h.HealthChecks == nil { + h.HealthChecks = new(HealthChecks) + } + if h.HealthChecks.Passive == nil { + h.HealthChecks.Passive = new(PassiveHealthChecks) + } + count, err := strconv.Atoi(d.Val()) + if err != nil { + return d.Errf("invalid minimum success count '%s': %v", d.Val(), err) + } + h.HealthChecks.Passive.MinSuccesses = count + case "fail_duration": if !d.NextArg() { return d.ArgErr() diff --git a/modules/caddyhttp/reverseproxy/healthchecks.go b/modules/caddyhttp/reverseproxy/healthchecks.go index 20ba0b9bfe3..14782e71b92 100644 --- a/modules/caddyhttp/reverseproxy/healthchecks.go +++ b/modules/caddyhttp/reverseproxy/healthchecks.go @@ -127,6 +127,14 @@ type PassiveHealthChecks struct { // must be configured for those stats to be counted. Default is 0 (no ratio). MinSuccessRatio caddyhttp.Ratio `json:"min_success_ratio,omitempty"` + // The minimum number of successful requests before considering the + // minimum success ratio. Default is 5. Requires MinSuccessRatio >= 0. + // + // If there are less than this many successful requests, then the ratio is + // ignored, because of a lack of data. This ensures that the upstream isn't + // prematurely considered unhealthy because no requests have happened yet. + MinSuccesses int `json:"min_successes,omitempty"` + // Limits the number of simultaneous requests to a backend by // marking the backend as "down" if it has this many concurrent // requests or more. diff --git a/modules/caddyhttp/reverseproxy/reverseproxy.go b/modules/caddyhttp/reverseproxy/reverseproxy.go index ad0bcfcf033..6ba98da7756 100644 --- a/modules/caddyhttp/reverseproxy/reverseproxy.go +++ b/modules/caddyhttp/reverseproxy/reverseproxy.go @@ -352,6 +352,10 @@ func (h *Handler) Provision(ctx caddy.Context) error { if h.HealthChecks.Passive.FailDuration > 0 && h.HealthChecks.Passive.MaxFails == 0 { h.HealthChecks.Passive.MaxFails = 1 } + + if h.HealthChecks.Passive.MinSuccessRatio > 0 && h.HealthChecks.Passive.MinSuccesses == 0 { + h.HealthChecks.Passive.MinSuccesses = 5 + } } // if active health checks are enabled, configure them and start a worker From 6d010189a56b191538bdc534de7ede7e89f754c4 Mon Sep 17 00:00:00 2001 From: Francis Lavoie Date: Sat, 15 Apr 2023 09:06:41 -0400 Subject: [PATCH 5/5] Implement success ratio in health checks --- modules/caddyhttp/reverseproxy/hosts.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/modules/caddyhttp/reverseproxy/hosts.go b/modules/caddyhttp/reverseproxy/hosts.go index 6815ea6269c..732a71746a6 100644 --- a/modules/caddyhttp/reverseproxy/hosts.go +++ b/modules/caddyhttp/reverseproxy/hosts.go @@ -84,6 +84,15 @@ func (u *Upstream) Healthy() bool { if healthy && u.healthCheckPolicy != nil { healthy = u.Host.Fails() < u.healthCheckPolicy.MaxFails } + if healthy && u.healthCheckPolicy != nil && + u.healthCheckPolicy.MinSuccessRatio > 0 { + successes := u.Host.Successes() + if successes >= u.healthCheckPolicy.MinSuccesses { + fails := u.Host.Fails() + healthRatio := float64(fails) / float64(successes) + healthy = healthRatio < (1 - float64(u.healthCheckPolicy.MinSuccessRatio)) + } + } if healthy && u.cb != nil { healthy = u.cb.OK() }