Initial revisin of healthchecker, inspired by HAProxy
This commit is contained in:
307
internal/health/state_test.go
Normal file
307
internal/health/state_test.go
Normal file
@@ -0,0 +1,307 @@
|
||||
package health
|
||||
|
||||
import (
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func newBackend() *Backend {
|
||||
return New("web4", net.ParseIP("10.0.0.1"), 2, 3) // rise=2, fall=3
|
||||
}
|
||||
|
||||
func pass() ProbeResult { return ProbeResult{OK: true, Layer: LayerL7, Code: "L7OK"} }
|
||||
func fail() ProbeResult { return ProbeResult{OK: false, Layer: LayerL4, Code: "L4CON"} }
|
||||
|
||||
func TestInitialState(t *testing.T) {
|
||||
b := newBackend()
|
||||
if b.State != StateUnknown {
|
||||
t.Errorf("initial state: got %s, want unknown", b.State)
|
||||
}
|
||||
if len(b.Transitions) != 0 {
|
||||
t.Errorf("initial transitions: got %d, want 0", len(b.Transitions))
|
||||
}
|
||||
if b.Counter.Health != 0 {
|
||||
t.Errorf("initial counter health: got %d, want 0", b.Counter.Health)
|
||||
}
|
||||
}
|
||||
|
||||
// TestRiseToUp: rise=2 passes from Down/Unknown → Up.
|
||||
func TestRiseToUp(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
initialState State
|
||||
}{
|
||||
{"from unknown", StateUnknown},
|
||||
{"from down", StateDown},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
b := newBackend()
|
||||
b.State = tt.initialState
|
||||
// First pass: counter=1, still in DOWN range (rise=2), no transition.
|
||||
if b.Record(pass(), 5) {
|
||||
t.Error("should not transition after 1 pass (rise=2)")
|
||||
}
|
||||
if b.State != tt.initialState {
|
||||
t.Errorf("state changed early: got %s", b.State)
|
||||
}
|
||||
// Second pass: counter=2=rise, transitions to Up.
|
||||
if !b.Record(pass(), 5) {
|
||||
t.Error("should transition to Up after 2 passes")
|
||||
}
|
||||
if b.State != StateUp {
|
||||
t.Errorf("state: got %s, want up", b.State)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestFallToDown: fall=3 failures from fully-healthy → Down.
|
||||
//
|
||||
// The fall guarantee applies from counter=max (fully healthy). A backend that
|
||||
// just became Up is at counter=rise (the floor of the UP range), so a single
|
||||
// failure already drops it back into the DOWN range. This is correct: hysteresis
|
||||
// protects a server that has been consistently healthy for a long time, not one
|
||||
// that just scraped past the rise threshold.
|
||||
func TestFallToDown(t *testing.T) {
|
||||
b := newBackend() // rise=2, fall=3, max=4
|
||||
|
||||
// Drive to fully healthy: need rise + (max-rise) = 4 passes total.
|
||||
for i := 0; i < b.Counter.Max(); i++ {
|
||||
b.Record(pass(), 5)
|
||||
}
|
||||
if b.State != StateUp {
|
||||
t.Fatalf("precondition: want up, got %s", b.State)
|
||||
}
|
||||
if b.Counter.Health != b.Counter.Max() {
|
||||
t.Fatalf("precondition: want counter=%d, got %d", b.Counter.Max(), b.Counter.Health)
|
||||
}
|
||||
|
||||
// fall-1=2 failures: counter 4→3→2, both still in UP range (>=rise=2).
|
||||
if b.Record(fail(), 5) {
|
||||
t.Error("should not transition after 1 fail from fully healthy")
|
||||
}
|
||||
if b.Record(fail(), 5) {
|
||||
t.Error("should not transition after 2 fails from fully healthy")
|
||||
}
|
||||
if b.State != StateUp {
|
||||
t.Errorf("state after 2 fails: got %s, want up", b.State)
|
||||
}
|
||||
|
||||
// Third failure: counter 2→1 < rise=2 → Down.
|
||||
if !b.Record(fail(), 5) {
|
||||
t.Error("should transition to Down after fall=3 failures from fully healthy")
|
||||
}
|
||||
if b.State != StateDown {
|
||||
t.Errorf("state: got %s, want down", b.State)
|
||||
}
|
||||
}
|
||||
|
||||
// TestUnknownToDownOnFirstFail: any failure while Unknown → Down immediately.
|
||||
func TestUnknownToDownOnFirstFail(t *testing.T) {
|
||||
b := newBackend()
|
||||
if !b.Record(fail(), 5) {
|
||||
t.Error("first fail from Unknown should transition to Down")
|
||||
}
|
||||
if b.State != StateDown {
|
||||
t.Errorf("state: got %s, want down", b.State)
|
||||
}
|
||||
}
|
||||
|
||||
// TestHysteresis: alternating pass/fail keeps backend in degraded range without
|
||||
// bouncing between Up and Down. This is the key HAProxy counter property.
|
||||
func TestHysteresis(t *testing.T) {
|
||||
b := newBackend()
|
||||
// Drive to Up.
|
||||
b.Record(pass(), 5)
|
||||
b.Record(pass(), 5) // counter=2, state=Up
|
||||
|
||||
// Alternate pass/fail. Counter oscillates 3↔2 (both in UP range for rise=2),
|
||||
// or 2↔1 (crossing the boundary). Let's trace:
|
||||
// Start: counter=2 (just became Up, was at 2=rise after second pass, then RecordPass incremented to 3... wait)
|
||||
// Actually: after first pass from Unknown (counter=0), counter=1. After second pass, counter=2=rise, RecordPass returns true → Up.
|
||||
// But RecordPass increments BEFORE checking: wasUp=false, counter becomes 2, IsUp()=true → returns true.
|
||||
// So after second pass: counter=2, state=Up.
|
||||
// max = rise+fall-1 = 2+3-1 = 4.
|
||||
|
||||
// fail: counter=1 < rise=2 → RecordFail: wasDown=false (counter was 2=rise, IsUp=true),
|
||||
// counter becomes 1, IsUp()=false → returns true → Down!
|
||||
// Hmm, so one fail from counter=2 (barely Up) → Down? That's with rise=2.
|
||||
|
||||
// The hysteresis is more visible with rise=2, fall=5: max=6.
|
||||
// Let's use a backend with more headroom.
|
||||
b2 := New("test", net.ParseIP("10.0.0.2"), 2, 5) // rise=2, fall=5, max=6
|
||||
// Drive to fully healthy.
|
||||
b2.Record(pass(), 5) // counter=1
|
||||
b2.Record(pass(), 5) // counter=2=rise → Up
|
||||
b2.Record(pass(), 5) // counter=3
|
||||
b2.Record(pass(), 5) // counter=4
|
||||
b2.Record(pass(), 5) // counter=5
|
||||
b2.Record(pass(), 5) // counter=6=max
|
||||
|
||||
// Now alternate: fail drops from 6, pass brings back up.
|
||||
// Should not transition since counter stays in UP range (>=2).
|
||||
for i := 0; i < 4; i++ {
|
||||
transitioned := b2.Record(fail(), 5) // 6→5→4→3→2 (all >=rise=2)
|
||||
if transitioned {
|
||||
t.Errorf("fail %d: should not transition (counter in UP range)", i+1)
|
||||
}
|
||||
if !b2.Counter.IsUp() {
|
||||
t.Errorf("fail %d: should still be up", i+1)
|
||||
}
|
||||
if b2.Record(pass(), 5) { // re-increment
|
||||
t.Errorf("pass %d: should not transition (already Up)", i+1)
|
||||
}
|
||||
}
|
||||
if b2.State != StateUp {
|
||||
t.Errorf("after alternating: want up, got %s", b2.State)
|
||||
}
|
||||
}
|
||||
|
||||
// TestNextInterval: correct interval selection based on counter state.
|
||||
func TestNextInterval(t *testing.T) {
|
||||
interval := 2 * time.Second
|
||||
fast := 500 * time.Millisecond
|
||||
down := 30 * time.Second
|
||||
|
||||
b := New("test", net.ParseIP("10.0.0.1"), 2, 3) // max=4
|
||||
|
||||
// Unknown (no probes yet): always use interval, never downInterval.
|
||||
if got := b.NextInterval(interval, fast, down); got != interval {
|
||||
t.Errorf("StateUnknown: got %v, want %v (interval)", got, interval)
|
||||
}
|
||||
|
||||
// After first fail: counter=0, state=Down → downInterval.
|
||||
b.Record(ProbeResult{OK: false, Code: "L4CON"}, 5)
|
||||
if b.State != StateDown {
|
||||
t.Fatalf("expected StateDown after first fail, got %s", b.State)
|
||||
}
|
||||
if got := b.NextInterval(interval, fast, down); got != down {
|
||||
t.Errorf("StateDown/counter=0: got %v, want %v (down)", got, down)
|
||||
}
|
||||
|
||||
// Drive to max (fully healthy) → interval.
|
||||
b.Counter.Health = b.Counter.Max()
|
||||
if got := b.NextInterval(interval, fast, down); got != interval {
|
||||
t.Errorf("counter=max: got %v, want %v (interval)", got, interval)
|
||||
}
|
||||
|
||||
// Degraded (0 < counter < max) → fastInterval.
|
||||
b.Counter.Health = 1
|
||||
if got := b.NextInterval(interval, fast, down); got != fast {
|
||||
t.Errorf("counter=1 (degraded): got %v, want %v (fast)", got, fast)
|
||||
}
|
||||
|
||||
// No fastInterval configured → falls back to interval.
|
||||
if got := b.NextInterval(interval, 0, down); got != interval {
|
||||
t.Errorf("degraded, no fast: got %v, want %v (interval)", got, interval)
|
||||
}
|
||||
|
||||
// No downInterval configured → falls back to interval.
|
||||
b.Counter.Health = 0
|
||||
if got := b.NextInterval(interval, fast, 0); got != interval {
|
||||
t.Errorf("down, no downInterval: got %v, want %v (interval)", got, interval)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPauseResume(t *testing.T) {
|
||||
b := newBackend()
|
||||
b.State = StateUp
|
||||
|
||||
changed := b.Pause(5)
|
||||
if !changed {
|
||||
t.Error("Pause should return true")
|
||||
}
|
||||
if b.State != StatePaused {
|
||||
t.Errorf("after Pause: got %s, want paused", b.State)
|
||||
}
|
||||
|
||||
// Probes ignored while paused.
|
||||
if b.Record(pass(), 5) {
|
||||
t.Error("Record(pass) should not transition while paused")
|
||||
}
|
||||
if b.Record(fail(), 5) {
|
||||
t.Error("Record(fail) should not transition while paused")
|
||||
}
|
||||
if b.State != StatePaused {
|
||||
t.Errorf("state changed while paused: %s", b.State)
|
||||
}
|
||||
|
||||
// Second Pause is a no-op.
|
||||
if b.Pause(5) {
|
||||
t.Error("second Pause should return false")
|
||||
}
|
||||
|
||||
changed = b.Resume(5)
|
||||
if !changed {
|
||||
t.Error("Resume should return true")
|
||||
}
|
||||
if b.State != StateUnknown {
|
||||
t.Errorf("after Resume: got %s, want unknown", b.State)
|
||||
}
|
||||
|
||||
// Resume on non-paused is a no-op.
|
||||
if b.Resume(5) {
|
||||
t.Error("Resume on non-paused should return false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransitionHistory(t *testing.T) {
|
||||
b := newBackend()
|
||||
maxHistory := 3
|
||||
|
||||
// Drive several state changes. Each cycle: pass×2→Up, fail→Down (Unknown→Down on first fail).
|
||||
b.Record(fail(), maxHistory) // Unknown→Down
|
||||
b.Record(pass(), maxHistory) // counter++
|
||||
b.Record(pass(), maxHistory) // Down→Up
|
||||
b.Record(fail(), maxHistory) // Up: counter drops
|
||||
b.Record(fail(), maxHistory) // Up: counter drops
|
||||
b.Record(fail(), maxHistory) // Up→Down
|
||||
b.Record(pass(), maxHistory) // counter++
|
||||
b.Record(pass(), maxHistory) // Down→Up
|
||||
|
||||
if len(b.Transitions) != maxHistory {
|
||||
t.Errorf("transitions capped at %d, got %d", maxHistory, len(b.Transitions))
|
||||
}
|
||||
// Newest first: last transition was →Up.
|
||||
if b.Transitions[0].To != StateUp {
|
||||
t.Errorf("newest transition: got %s, want up", b.Transitions[0].To)
|
||||
}
|
||||
// Transitions carry ProbeResult.
|
||||
if b.Transitions[0].Result.Code == "" {
|
||||
t.Error("transition result code should not be empty")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTransitionTimestamp(t *testing.T) {
|
||||
b := newBackend()
|
||||
before := time.Now()
|
||||
b.Record(fail(), 5)
|
||||
after := time.Now()
|
||||
|
||||
if len(b.Transitions) == 0 {
|
||||
t.Fatal("expected a transition")
|
||||
}
|
||||
ts := b.Transitions[0].At
|
||||
if ts.Before(before) || ts.After(after) {
|
||||
t.Errorf("transition timestamp %v outside [%v, %v]", ts, before, after)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStateString(t *testing.T) {
|
||||
cases := []struct {
|
||||
s State
|
||||
want string
|
||||
}{
|
||||
{StateUnknown, "unknown"},
|
||||
{StateUp, "up"},
|
||||
{StateDown, "down"},
|
||||
{StatePaused, "paused"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if c.s.String() != c.want {
|
||||
t.Errorf("State(%d).String() = %q, want %q", c.s, c.s.String(), c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user