Initial revisin of healthchecker, inspired by HAProxy
This commit is contained in:
189
internal/health/state.go
Normal file
189
internal/health/state.go
Normal file
@@ -0,0 +1,189 @@
|
||||
package health
|
||||
|
||||
import (
|
||||
"net"
|
||||
"time"
|
||||
)
|
||||
|
||||
// CheckLayer indicates at which network layer a probe stopped.
|
||||
type CheckLayer int
|
||||
|
||||
const (
|
||||
LayerUnknown CheckLayer = iota
|
||||
LayerL4 // TCP connect
|
||||
LayerL6 // TLS handshake
|
||||
LayerL7 // Application (HTTP response, ICMP reply)
|
||||
)
|
||||
|
||||
// ProbeResult is the outcome of a single probe execution.
|
||||
type ProbeResult struct {
|
||||
OK bool
|
||||
Layer CheckLayer
|
||||
Code string // "L4OK", "L4TOUT", "L4CON", "L7OK", "L7TOUT", "L7RSP", "L7STS"
|
||||
Detail string // human-readable, e.g. "HTTP 503", "connection refused"
|
||||
}
|
||||
|
||||
// State represents the health state of a backend.
|
||||
type State int
|
||||
|
||||
const (
|
||||
StateUnknown State = iota // initial state before first probe
|
||||
StateUp
|
||||
StateDown
|
||||
StatePaused
|
||||
)
|
||||
|
||||
func (s State) String() string {
|
||||
switch s {
|
||||
case StateUnknown:
|
||||
return "unknown"
|
||||
case StateUp:
|
||||
return "up"
|
||||
case StateDown:
|
||||
return "down"
|
||||
case StatePaused:
|
||||
return "paused"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// Transition records a single state change event.
|
||||
type Transition struct {
|
||||
From State
|
||||
To State
|
||||
At time.Time
|
||||
Result ProbeResult
|
||||
}
|
||||
|
||||
// HealthCounter is HAProxy's single-integer rise/fall model.
|
||||
//
|
||||
// Health ∈ [0, Rise+Fall-1]. Server is UP when Health >= Rise, DOWN when
|
||||
// Health < Rise. On success Health increments (ceiling Rise+Fall-1); on
|
||||
// failure Health decrements (floor 0). This gives hysteresis: a flapping
|
||||
// backend stays in the degraded range without bouncing between UP and DOWN.
|
||||
type HealthCounter struct {
|
||||
Health int
|
||||
Rise int
|
||||
Fall int
|
||||
}
|
||||
|
||||
func (h *HealthCounter) Max() int { return h.Rise + h.Fall - 1 }
|
||||
func (h *HealthCounter) IsUp() bool { return h.Health >= h.Rise }
|
||||
func (h *HealthCounter) IsDegraded() bool { return h.Health > 0 && h.Health < h.Max() }
|
||||
|
||||
// RecordPass increments the counter. Returns true if the server just became UP.
|
||||
func (h *HealthCounter) RecordPass() bool {
|
||||
wasUp := h.IsUp()
|
||||
if h.Health < h.Max() {
|
||||
h.Health++
|
||||
}
|
||||
return !wasUp && h.IsUp()
|
||||
}
|
||||
|
||||
// RecordFail decrements the counter. Returns true if the server just went DOWN.
|
||||
func (h *HealthCounter) RecordFail() bool {
|
||||
wasDown := !h.IsUp()
|
||||
if h.Health > 0 {
|
||||
h.Health--
|
||||
}
|
||||
return !wasDown && !h.IsUp()
|
||||
}
|
||||
|
||||
// Backend tracks the health state of one VIP:backend tuple.
|
||||
type Backend struct {
|
||||
VIPName string
|
||||
Address net.IP
|
||||
State State
|
||||
Counter HealthCounter
|
||||
Transitions []Transition // newest first, capped at maxHistory
|
||||
}
|
||||
|
||||
// New creates a Backend in StateUnknown.
|
||||
func New(vipName string, addr net.IP, rise, fall int) *Backend {
|
||||
return &Backend{
|
||||
VIPName: vipName,
|
||||
Address: addr,
|
||||
State: StateUnknown,
|
||||
Counter: HealthCounter{Rise: rise, Fall: fall},
|
||||
}
|
||||
}
|
||||
|
||||
// Record applies a probe result to the health counter and transitions state if
|
||||
// needed. Returns true if the state changed.
|
||||
//
|
||||
// StateUnknown transitions to StateDown on the first failure (any evidence of
|
||||
// failure means the backend is not yet confirmed reachable), and to StateUp
|
||||
// once the counter reaches Rise consecutive passes.
|
||||
func (b *Backend) Record(r ProbeResult, maxHistory int) bool {
|
||||
if b.State == StatePaused {
|
||||
return false
|
||||
}
|
||||
if r.OK {
|
||||
if b.Counter.RecordPass() {
|
||||
b.transition(StateUp, r, maxHistory)
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
if b.Counter.RecordFail() || b.State == StateUnknown {
|
||||
b.transition(StateDown, r, maxHistory)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Pause transitions the backend to StatePaused. Returns true if the state changed.
|
||||
func (b *Backend) Pause(maxHistory int) bool {
|
||||
if b.State == StatePaused {
|
||||
return false
|
||||
}
|
||||
b.transition(StatePaused, ProbeResult{}, maxHistory)
|
||||
b.Counter.Health = 0
|
||||
return true
|
||||
}
|
||||
|
||||
// Resume transitions a paused backend back to StateUnknown, resetting the
|
||||
// counter. Returns true if the state changed.
|
||||
func (b *Backend) Resume(maxHistory int) bool {
|
||||
if b.State != StatePaused {
|
||||
return false
|
||||
}
|
||||
b.transition(StateUnknown, ProbeResult{}, maxHistory)
|
||||
b.Counter.Health = 0
|
||||
return true
|
||||
}
|
||||
|
||||
// NextInterval returns the appropriate probe interval based on state and counter:
|
||||
// - Unknown (no probes yet): interval — probe promptly to establish initial state
|
||||
// - Fully healthy (counter at max): interval
|
||||
// - Fully down (counter at 0): downInterval (falls back to interval)
|
||||
// - Degraded (anywhere in between): fastInterval (falls back to interval)
|
||||
func (b *Backend) NextInterval(interval, fastInterval, downInterval time.Duration) time.Duration {
|
||||
if b.State == StateUnknown {
|
||||
return interval
|
||||
}
|
||||
if b.Counter.Health == b.Counter.Max() {
|
||||
return interval
|
||||
}
|
||||
if b.Counter.Health == 0 {
|
||||
if downInterval > 0 {
|
||||
return downInterval
|
||||
}
|
||||
return interval
|
||||
}
|
||||
if fastInterval > 0 {
|
||||
return fastInterval
|
||||
}
|
||||
return interval
|
||||
}
|
||||
|
||||
// transition appends a new Transition and updates State.
|
||||
func (b *Backend) transition(to State, r ProbeResult, maxHistory int) {
|
||||
t := Transition{From: b.State, To: to, At: time.Now(), Result: r}
|
||||
b.Transitions = append([]Transition{t}, b.Transitions...)
|
||||
if len(b.Transitions) > maxHistory {
|
||||
b.Transitions = b.Transitions[:maxHistory]
|
||||
}
|
||||
b.State = to
|
||||
}
|
||||
Reference in New Issue
Block a user