// Copyright (c) 2026, Pim van Pelt package health import ( "net" "time" ) // CheckLayer indicates at which network layer a probe stopped. type CheckLayer int const ( LayerUnknown CheckLayer = iota LayerL4 // TCP connect LayerL6 // TLS handshake LayerL7 // Application (HTTP response, ICMP reply) ) // ProbeResult is the outcome of a single probe execution. type ProbeResult struct { OK bool Layer CheckLayer Code string // "L4OK", "L4TOUT", "L4CON", "L7OK", "L7TOUT", "L7RSP", "L7STS" Detail string // human-readable, e.g. "HTTP 503", "connection refused" } // State represents the health state of a backend. type State int const ( StateUnknown State = iota // initial state before first probe StateUp // backend is healthy StateDown // backend has failed enough probes StatePaused // operator paused health checking StateDisabled // operator disabled the backend StateRemoved // backend removed from configuration by reload ) func (s State) String() string { switch s { case StateUnknown: return "unknown" case StateUp: return "up" case StateDown: return "down" case StatePaused: return "paused" case StateDisabled: return "disabled" case StateRemoved: return "removed" default: return "unknown" } } // Transition records a single state change event. type Transition struct { From State To State At time.Time Result ProbeResult } // HealthCounter is HAProxy's single-integer rise/fall model. // // Health ∈ [0, Rise+Fall-1]. Server is UP when Health >= Rise, DOWN when // Health < Rise. On success Health increments (ceiling Rise+Fall-1); on // failure Health decrements (floor 0). This gives hysteresis: a flapping // backend stays in the degraded range without bouncing between UP and DOWN. type HealthCounter struct { Health int Rise int Fall int } func (h *HealthCounter) Max() int { return h.Rise + h.Fall - 1 } func (h *HealthCounter) IsUp() bool { return h.Health >= h.Rise } func (h *HealthCounter) IsDegraded() bool { return h.Health > 0 && h.Health < h.Max() } // RecordPass increments the counter. Returns true if the server just became UP. func (h *HealthCounter) RecordPass() bool { wasUp := h.IsUp() if h.Health < h.Max() { h.Health++ } return !wasUp && h.IsUp() } // RecordFail decrements the counter. Returns true if the server just went DOWN. func (h *HealthCounter) RecordFail() bool { wasDown := !h.IsUp() if h.Health > 0 { h.Health-- } return !wasDown && !h.IsUp() } // Backend tracks the health state of a named backend. type Backend struct { Name string Address net.IP State State Counter HealthCounter Transitions []Transition // newest first, capped at maxHistory } // New creates a Backend in StateUnknown with the health counter pre-loaded to // Rise-1, so the very first probe resolves the state: one pass → Up, any // fail → Down (via the StateUnknown shortcut in Record). func New(name string, addr net.IP, rise, fall int) *Backend { return &Backend{ Name: name, Address: addr, State: StateUnknown, Counter: HealthCounter{Rise: rise, Fall: fall, Health: rise - 1}, } } // Record applies a probe result to the health counter and transitions state if // needed. Returns true if the state changed. // // StateUnknown transitions to StateDown on the first failure (any evidence of // failure means the backend is not yet confirmed reachable), and to StateUp // once the counter reaches Rise consecutive passes. func (b *Backend) Record(r ProbeResult, maxHistory int) bool { if b.State == StatePaused || b.State == StateDisabled || b.State == StateRemoved { return false } if r.OK { if b.Counter.RecordPass() { b.transition(StateUp, r, maxHistory) return true } } else { if b.Counter.RecordFail() || b.State == StateUnknown { b.transition(StateDown, r, maxHistory) return true } } return false } // Pause transitions the backend to StatePaused. Returns true if the state changed. func (b *Backend) Pause(maxHistory int) bool { if b.State == StatePaused { return false } b.transition(StatePaused, ProbeResult{}, maxHistory) b.Counter.Health = 0 return true } // Resume transitions a paused backend back to StateUnknown, resetting the // counter. Returns true if the state changed. func (b *Backend) Resume(maxHistory int) bool { if b.State != StatePaused { return false } b.transition(StateUnknown, ProbeResult{}, maxHistory) b.Counter.Health = b.Counter.Rise - 1 return true } // NextInterval returns the appropriate probe interval based on state and counter: // - Unknown (initial / post-resume): fastInterval (falls back to interval) — probe quickly to establish state // - Fully healthy (counter at max): interval // - Fully down (counter at 0): downInterval (falls back to interval) // - Degraded (anywhere in between): fastInterval (falls back to interval) func (b *Backend) NextInterval(interval, fastInterval, downInterval time.Duration) time.Duration { if b.State == StateUnknown { if fastInterval > 0 { return fastInterval } return interval } if b.Counter.Health == b.Counter.Max() { return interval } if b.Counter.Health == 0 { if downInterval > 0 { return downInterval } return interval } if fastInterval > 0 { return fastInterval } return interval } // Start records the initial StateUnknown transition when a backend is first // created or restarted. It exists solely to populate the transition history // and fire a reload event; the state does not change. func (b *Backend) Start(maxHistory int) Transition { b.transition(StateUnknown, ProbeResult{Code: "start"}, maxHistory) return b.Transitions[0] } // Disable transitions the backend to StateDisabled. Returns the transition. // After this call no further probe results are accepted. func (b *Backend) Disable(maxHistory int) Transition { b.transition(StateDisabled, ProbeResult{Code: "disabled"}, maxHistory) return b.Transitions[0] } // Enable transitions a disabled backend back to StateUnknown, resetting the // counter so the first probe result resolves state (rise-1 preload gives // 1-pass → Up, 1-fail → Down). Returns the transition. func (b *Backend) Enable(maxHistory int) Transition { b.transition(StateUnknown, ProbeResult{Code: "enabled"}, maxHistory) b.Counter.Health = b.Counter.Rise - 1 return b.Transitions[0] } // Remove transitions the backend to StateRemoved. Returns the transition. // After this call no further probe results are accepted. func (b *Backend) Remove(maxHistory int) Transition { b.transition(StateRemoved, ProbeResult{Code: "removed"}, maxHistory) return b.Transitions[0] } // transition appends a new Transition and updates State. func (b *Backend) transition(to State, r ProbeResult, maxHistory int) { t := Transition{From: b.State, To: to, At: time.Now(), Result: r} b.Transitions = append([]Transition{t}, b.Transitions...) if len(b.Transitions) > maxHistory { b.Transitions = b.Transitions[:maxHistory] } b.State = to }