- when a backend gets newly added or restarted, an event should fire; perhaps transition to 'unknown' - when a backend gets removed, an event should fire; perhaps transition to 'removed' - when a backend is in 'unknown' state, fast-interval is appropriate
349 lines
11 KiB
Go
349 lines
11 KiB
Go
// Copyright (c) 2026, Pim van Pelt <pim@ipng.ch>
|
||
|
||
package health
|
||
|
||
import (
|
||
"net"
|
||
"testing"
|
||
"time"
|
||
)
|
||
|
||
func newBackend() *Backend {
|
||
return New("web4", net.ParseIP("10.0.0.1"), 2, 3) // rise=2, fall=3
|
||
}
|
||
|
||
func pass() ProbeResult { return ProbeResult{OK: true, Layer: LayerL7, Code: "L7OK"} }
|
||
func fail() ProbeResult { return ProbeResult{OK: false, Layer: LayerL4, Code: "L4CON"} }
|
||
|
||
func TestInitialState(t *testing.T) {
|
||
b := newBackend()
|
||
if b.State != StateUnknown {
|
||
t.Errorf("initial state: got %s, want unknown", b.State)
|
||
}
|
||
if len(b.Transitions) != 0 {
|
||
t.Errorf("initial transitions: got %d, want 0", len(b.Transitions))
|
||
}
|
||
if b.Counter.Health != 0 {
|
||
t.Errorf("initial counter health: got %d, want 0", b.Counter.Health)
|
||
}
|
||
}
|
||
|
||
// TestRiseToUp: rise=2 passes from Down/Unknown → Up.
|
||
func TestRiseToUp(t *testing.T) {
|
||
tests := []struct {
|
||
name string
|
||
initialState State
|
||
}{
|
||
{"from unknown", StateUnknown},
|
||
{"from down", StateDown},
|
||
}
|
||
for _, tt := range tests {
|
||
t.Run(tt.name, func(t *testing.T) {
|
||
b := newBackend()
|
||
b.State = tt.initialState
|
||
// First pass: counter=1, still in DOWN range (rise=2), no transition.
|
||
if b.Record(pass(), 5) {
|
||
t.Error("should not transition after 1 pass (rise=2)")
|
||
}
|
||
if b.State != tt.initialState {
|
||
t.Errorf("state changed early: got %s", b.State)
|
||
}
|
||
// Second pass: counter=2=rise, transitions to Up.
|
||
if !b.Record(pass(), 5) {
|
||
t.Error("should transition to Up after 2 passes")
|
||
}
|
||
if b.State != StateUp {
|
||
t.Errorf("state: got %s, want up", b.State)
|
||
}
|
||
})
|
||
}
|
||
}
|
||
|
||
// TestFallToDown: fall=3 failures from fully-healthy → Down.
|
||
//
|
||
// The fall guarantee applies from counter=max (fully healthy). A backend that
|
||
// just became Up is at counter=rise (the floor of the UP range), so a single
|
||
// failure already drops it back into the DOWN range. This is correct: hysteresis
|
||
// protects a server that has been consistently healthy for a long time, not one
|
||
// that just scraped past the rise threshold.
|
||
func TestFallToDown(t *testing.T) {
|
||
b := newBackend() // rise=2, fall=3, max=4
|
||
|
||
// Drive to fully healthy: need rise + (max-rise) = 4 passes total.
|
||
for i := 0; i < b.Counter.Max(); i++ {
|
||
b.Record(pass(), 5)
|
||
}
|
||
if b.State != StateUp {
|
||
t.Fatalf("precondition: want up, got %s", b.State)
|
||
}
|
||
if b.Counter.Health != b.Counter.Max() {
|
||
t.Fatalf("precondition: want counter=%d, got %d", b.Counter.Max(), b.Counter.Health)
|
||
}
|
||
|
||
// fall-1=2 failures: counter 4→3→2, both still in UP range (>=rise=2).
|
||
if b.Record(fail(), 5) {
|
||
t.Error("should not transition after 1 fail from fully healthy")
|
||
}
|
||
if b.Record(fail(), 5) {
|
||
t.Error("should not transition after 2 fails from fully healthy")
|
||
}
|
||
if b.State != StateUp {
|
||
t.Errorf("state after 2 fails: got %s, want up", b.State)
|
||
}
|
||
|
||
// Third failure: counter 2→1 < rise=2 → Down.
|
||
if !b.Record(fail(), 5) {
|
||
t.Error("should transition to Down after fall=3 failures from fully healthy")
|
||
}
|
||
if b.State != StateDown {
|
||
t.Errorf("state: got %s, want down", b.State)
|
||
}
|
||
}
|
||
|
||
// TestUnknownToDownOnFirstFail: any failure while Unknown → Down immediately.
|
||
func TestUnknownToDownOnFirstFail(t *testing.T) {
|
||
b := newBackend()
|
||
if !b.Record(fail(), 5) {
|
||
t.Error("first fail from Unknown should transition to Down")
|
||
}
|
||
if b.State != StateDown {
|
||
t.Errorf("state: got %s, want down", b.State)
|
||
}
|
||
}
|
||
|
||
// TestHysteresis: alternating pass/fail keeps backend in degraded range without
|
||
// bouncing between Up and Down. This is the key HAProxy counter property.
|
||
func TestHysteresis(t *testing.T) {
|
||
b := newBackend()
|
||
// Drive to Up.
|
||
b.Record(pass(), 5)
|
||
b.Record(pass(), 5) // counter=2, state=Up
|
||
|
||
// Alternate pass/fail. Counter oscillates 3↔2 (both in UP range for rise=2),
|
||
// or 2↔1 (crossing the boundary). Let's trace:
|
||
// Start: counter=2 (just became Up, was at 2=rise after second pass, then RecordPass incremented to 3... wait)
|
||
// Actually: after first pass from Unknown (counter=0), counter=1. After second pass, counter=2=rise, RecordPass returns true → Up.
|
||
// But RecordPass increments BEFORE checking: wasUp=false, counter becomes 2, IsUp()=true → returns true.
|
||
// So after second pass: counter=2, state=Up.
|
||
// max = rise+fall-1 = 2+3-1 = 4.
|
||
|
||
// fail: counter=1 < rise=2 → RecordFail: wasDown=false (counter was 2=rise, IsUp=true),
|
||
// counter becomes 1, IsUp()=false → returns true → Down!
|
||
// Hmm, so one fail from counter=2 (barely Up) → Down? That's with rise=2.
|
||
|
||
// The hysteresis is more visible with rise=2, fall=5: max=6.
|
||
// Let's use a backend with more headroom.
|
||
b2 := New("test", net.ParseIP("10.0.0.2"), 2, 5) // rise=2, fall=5, max=6
|
||
// Drive to fully healthy.
|
||
b2.Record(pass(), 5) // counter=1
|
||
b2.Record(pass(), 5) // counter=2=rise → Up
|
||
b2.Record(pass(), 5) // counter=3
|
||
b2.Record(pass(), 5) // counter=4
|
||
b2.Record(pass(), 5) // counter=5
|
||
b2.Record(pass(), 5) // counter=6=max
|
||
|
||
// Now alternate: fail drops from 6, pass brings back up.
|
||
// Should not transition since counter stays in UP range (>=2).
|
||
for i := 0; i < 4; i++ {
|
||
transitioned := b2.Record(fail(), 5) // 6→5→4→3→2 (all >=rise=2)
|
||
if transitioned {
|
||
t.Errorf("fail %d: should not transition (counter in UP range)", i+1)
|
||
}
|
||
if !b2.Counter.IsUp() {
|
||
t.Errorf("fail %d: should still be up", i+1)
|
||
}
|
||
if b2.Record(pass(), 5) { // re-increment
|
||
t.Errorf("pass %d: should not transition (already Up)", i+1)
|
||
}
|
||
}
|
||
if b2.State != StateUp {
|
||
t.Errorf("after alternating: want up, got %s", b2.State)
|
||
}
|
||
}
|
||
|
||
// TestNextInterval: correct interval selection based on counter state.
|
||
func TestNextInterval(t *testing.T) {
|
||
interval := 2 * time.Second
|
||
fast := 500 * time.Millisecond
|
||
down := 30 * time.Second
|
||
|
||
b := New("test", net.ParseIP("10.0.0.1"), 2, 3) // max=4
|
||
|
||
// Unknown: use fast-interval to establish state quickly.
|
||
if got := b.NextInterval(interval, fast, down); got != fast {
|
||
t.Errorf("StateUnknown with fast: got %v, want %v (fast)", got, fast)
|
||
}
|
||
// Unknown, no fast-interval configured: fall back to interval.
|
||
if got := b.NextInterval(interval, 0, down); got != interval {
|
||
t.Errorf("StateUnknown without fast: got %v, want %v (interval)", got, interval)
|
||
}
|
||
|
||
// After first fail: counter=0, state=Down → downInterval.
|
||
b.Record(ProbeResult{OK: false, Code: "L4CON"}, 5)
|
||
if b.State != StateDown {
|
||
t.Fatalf("expected StateDown after first fail, got %s", b.State)
|
||
}
|
||
if got := b.NextInterval(interval, fast, down); got != down {
|
||
t.Errorf("StateDown/counter=0: got %v, want %v (down)", got, down)
|
||
}
|
||
|
||
// Drive to max (fully healthy) → interval.
|
||
b.Counter.Health = b.Counter.Max()
|
||
if got := b.NextInterval(interval, fast, down); got != interval {
|
||
t.Errorf("counter=max: got %v, want %v (interval)", got, interval)
|
||
}
|
||
|
||
// Degraded (0 < counter < max) → fastInterval.
|
||
b.Counter.Health = 1
|
||
if got := b.NextInterval(interval, fast, down); got != fast {
|
||
t.Errorf("counter=1 (degraded): got %v, want %v (fast)", got, fast)
|
||
}
|
||
|
||
// No fastInterval configured → falls back to interval.
|
||
if got := b.NextInterval(interval, 0, down); got != interval {
|
||
t.Errorf("degraded, no fast: got %v, want %v (interval)", got, interval)
|
||
}
|
||
|
||
// No downInterval configured → falls back to interval.
|
||
b.Counter.Health = 0
|
||
if got := b.NextInterval(interval, fast, 0); got != interval {
|
||
t.Errorf("down, no downInterval: got %v, want %v (interval)", got, interval)
|
||
}
|
||
}
|
||
|
||
func TestPauseResume(t *testing.T) {
|
||
b := newBackend()
|
||
b.State = StateUp
|
||
|
||
changed := b.Pause(5)
|
||
if !changed {
|
||
t.Error("Pause should return true")
|
||
}
|
||
if b.State != StatePaused {
|
||
t.Errorf("after Pause: got %s, want paused", b.State)
|
||
}
|
||
|
||
// Probes ignored while paused.
|
||
if b.Record(pass(), 5) {
|
||
t.Error("Record(pass) should not transition while paused")
|
||
}
|
||
if b.Record(fail(), 5) {
|
||
t.Error("Record(fail) should not transition while paused")
|
||
}
|
||
if b.State != StatePaused {
|
||
t.Errorf("state changed while paused: %s", b.State)
|
||
}
|
||
|
||
// Second Pause is a no-op.
|
||
if b.Pause(5) {
|
||
t.Error("second Pause should return false")
|
||
}
|
||
|
||
changed = b.Resume(5)
|
||
if !changed {
|
||
t.Error("Resume should return true")
|
||
}
|
||
if b.State != StateUnknown {
|
||
t.Errorf("after Resume: got %s, want unknown", b.State)
|
||
}
|
||
|
||
// Resume on non-paused is a no-op.
|
||
if b.Resume(5) {
|
||
t.Error("Resume on non-paused should return false")
|
||
}
|
||
}
|
||
|
||
func TestTransitionHistory(t *testing.T) {
|
||
b := newBackend()
|
||
maxHistory := 3
|
||
|
||
// Drive several state changes. Each cycle: pass×2→Up, fail→Down (Unknown→Down on first fail).
|
||
b.Record(fail(), maxHistory) // Unknown→Down
|
||
b.Record(pass(), maxHistory) // counter++
|
||
b.Record(pass(), maxHistory) // Down→Up
|
||
b.Record(fail(), maxHistory) // Up: counter drops
|
||
b.Record(fail(), maxHistory) // Up: counter drops
|
||
b.Record(fail(), maxHistory) // Up→Down
|
||
b.Record(pass(), maxHistory) // counter++
|
||
b.Record(pass(), maxHistory) // Down→Up
|
||
|
||
if len(b.Transitions) != maxHistory {
|
||
t.Errorf("transitions capped at %d, got %d", maxHistory, len(b.Transitions))
|
||
}
|
||
// Newest first: last transition was →Up.
|
||
if b.Transitions[0].To != StateUp {
|
||
t.Errorf("newest transition: got %s, want up", b.Transitions[0].To)
|
||
}
|
||
// Transitions carry ProbeResult.
|
||
if b.Transitions[0].Result.Code == "" {
|
||
t.Error("transition result code should not be empty")
|
||
}
|
||
}
|
||
|
||
func TestTransitionTimestamp(t *testing.T) {
|
||
b := newBackend()
|
||
before := time.Now()
|
||
b.Record(fail(), 5)
|
||
after := time.Now()
|
||
|
||
if len(b.Transitions) == 0 {
|
||
t.Fatal("expected a transition")
|
||
}
|
||
ts := b.Transitions[0].At
|
||
if ts.Before(before) || ts.After(after) {
|
||
t.Errorf("transition timestamp %v outside [%v, %v]", ts, before, after)
|
||
}
|
||
}
|
||
|
||
func TestStartRemove(t *testing.T) {
|
||
b := newBackend()
|
||
|
||
// Start records an unknown→unknown transition.
|
||
tr := b.Start(5)
|
||
if tr.From != StateUnknown || tr.To != StateUnknown {
|
||
t.Errorf("Start transition: got %s→%s, want unknown→unknown", tr.From, tr.To)
|
||
}
|
||
if len(b.Transitions) != 1 {
|
||
t.Errorf("transitions after Start: got %d, want 1", len(b.Transitions))
|
||
}
|
||
if b.State != StateUnknown {
|
||
t.Errorf("state after Start: got %s, want unknown", b.State)
|
||
}
|
||
|
||
// Remove transitions to StateRemoved.
|
||
b.State = StateUp
|
||
tr = b.Remove(5)
|
||
if tr.From != StateUp || tr.To != StateRemoved {
|
||
t.Errorf("Remove transition: got %s→%s, want up→removed", tr.From, tr.To)
|
||
}
|
||
if b.State != StateRemoved {
|
||
t.Errorf("state after Remove: got %s, want removed", b.State)
|
||
}
|
||
|
||
// Record is a no-op once removed.
|
||
if b.Record(pass(), 5) {
|
||
t.Error("Record should not transition a removed backend")
|
||
}
|
||
if b.State != StateRemoved {
|
||
t.Errorf("state changed after Record on removed backend: %s", b.State)
|
||
}
|
||
}
|
||
|
||
func TestStateString(t *testing.T) {
|
||
cases := []struct {
|
||
s State
|
||
want string
|
||
}{
|
||
{StateUnknown, "unknown"},
|
||
{StateUp, "up"},
|
||
{StateDown, "down"},
|
||
{StatePaused, "paused"},
|
||
{StateRemoved, "removed"},
|
||
}
|
||
for _, c := range cases {
|
||
if c.s.String() != c.want {
|
||
t.Errorf("State(%d).String() = %q, want %q", c.s, c.s.String(), c.want)
|
||
}
|
||
}
|
||
}
|