Files
vpp-maglev/internal/health/state_test.go

308 lines
9.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package health
import (
"net"
"testing"
"time"
)
func newBackend() *Backend {
return New("web4", net.ParseIP("10.0.0.1"), 2, 3) // rise=2, fall=3
}
func pass() ProbeResult { return ProbeResult{OK: true, Layer: LayerL7, Code: "L7OK"} }
func fail() ProbeResult { return ProbeResult{OK: false, Layer: LayerL4, Code: "L4CON"} }
func TestInitialState(t *testing.T) {
b := newBackend()
if b.State != StateUnknown {
t.Errorf("initial state: got %s, want unknown", b.State)
}
if len(b.Transitions) != 0 {
t.Errorf("initial transitions: got %d, want 0", len(b.Transitions))
}
if b.Counter.Health != 0 {
t.Errorf("initial counter health: got %d, want 0", b.Counter.Health)
}
}
// TestRiseToUp: rise=2 passes from Down/Unknown → Up.
func TestRiseToUp(t *testing.T) {
tests := []struct {
name string
initialState State
}{
{"from unknown", StateUnknown},
{"from down", StateDown},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := newBackend()
b.State = tt.initialState
// First pass: counter=1, still in DOWN range (rise=2), no transition.
if b.Record(pass(), 5) {
t.Error("should not transition after 1 pass (rise=2)")
}
if b.State != tt.initialState {
t.Errorf("state changed early: got %s", b.State)
}
// Second pass: counter=2=rise, transitions to Up.
if !b.Record(pass(), 5) {
t.Error("should transition to Up after 2 passes")
}
if b.State != StateUp {
t.Errorf("state: got %s, want up", b.State)
}
})
}
}
// TestFallToDown: fall=3 failures from fully-healthy → Down.
//
// The fall guarantee applies from counter=max (fully healthy). A backend that
// just became Up is at counter=rise (the floor of the UP range), so a single
// failure already drops it back into the DOWN range. This is correct: hysteresis
// protects a server that has been consistently healthy for a long time, not one
// that just scraped past the rise threshold.
func TestFallToDown(t *testing.T) {
b := newBackend() // rise=2, fall=3, max=4
// Drive to fully healthy: need rise + (max-rise) = 4 passes total.
for i := 0; i < b.Counter.Max(); i++ {
b.Record(pass(), 5)
}
if b.State != StateUp {
t.Fatalf("precondition: want up, got %s", b.State)
}
if b.Counter.Health != b.Counter.Max() {
t.Fatalf("precondition: want counter=%d, got %d", b.Counter.Max(), b.Counter.Health)
}
// fall-1=2 failures: counter 4→3→2, both still in UP range (>=rise=2).
if b.Record(fail(), 5) {
t.Error("should not transition after 1 fail from fully healthy")
}
if b.Record(fail(), 5) {
t.Error("should not transition after 2 fails from fully healthy")
}
if b.State != StateUp {
t.Errorf("state after 2 fails: got %s, want up", b.State)
}
// Third failure: counter 2→1 < rise=2 → Down.
if !b.Record(fail(), 5) {
t.Error("should transition to Down after fall=3 failures from fully healthy")
}
if b.State != StateDown {
t.Errorf("state: got %s, want down", b.State)
}
}
// TestUnknownToDownOnFirstFail: any failure while Unknown → Down immediately.
func TestUnknownToDownOnFirstFail(t *testing.T) {
b := newBackend()
if !b.Record(fail(), 5) {
t.Error("first fail from Unknown should transition to Down")
}
if b.State != StateDown {
t.Errorf("state: got %s, want down", b.State)
}
}
// TestHysteresis: alternating pass/fail keeps backend in degraded range without
// bouncing between Up and Down. This is the key HAProxy counter property.
func TestHysteresis(t *testing.T) {
b := newBackend()
// Drive to Up.
b.Record(pass(), 5)
b.Record(pass(), 5) // counter=2, state=Up
// Alternate pass/fail. Counter oscillates 3↔2 (both in UP range for rise=2),
// or 2↔1 (crossing the boundary). Let's trace:
// Start: counter=2 (just became Up, was at 2=rise after second pass, then RecordPass incremented to 3... wait)
// Actually: after first pass from Unknown (counter=0), counter=1. After second pass, counter=2=rise, RecordPass returns true → Up.
// But RecordPass increments BEFORE checking: wasUp=false, counter becomes 2, IsUp()=true → returns true.
// So after second pass: counter=2, state=Up.
// max = rise+fall-1 = 2+3-1 = 4.
// fail: counter=1 < rise=2 → RecordFail: wasDown=false (counter was 2=rise, IsUp=true),
// counter becomes 1, IsUp()=false → returns true → Down!
// Hmm, so one fail from counter=2 (barely Up) → Down? That's with rise=2.
// The hysteresis is more visible with rise=2, fall=5: max=6.
// Let's use a backend with more headroom.
b2 := New("test", net.ParseIP("10.0.0.2"), 2, 5) // rise=2, fall=5, max=6
// Drive to fully healthy.
b2.Record(pass(), 5) // counter=1
b2.Record(pass(), 5) // counter=2=rise → Up
b2.Record(pass(), 5) // counter=3
b2.Record(pass(), 5) // counter=4
b2.Record(pass(), 5) // counter=5
b2.Record(pass(), 5) // counter=6=max
// Now alternate: fail drops from 6, pass brings back up.
// Should not transition since counter stays in UP range (>=2).
for i := 0; i < 4; i++ {
transitioned := b2.Record(fail(), 5) // 6→5→4→3→2 (all >=rise=2)
if transitioned {
t.Errorf("fail %d: should not transition (counter in UP range)", i+1)
}
if !b2.Counter.IsUp() {
t.Errorf("fail %d: should still be up", i+1)
}
if b2.Record(pass(), 5) { // re-increment
t.Errorf("pass %d: should not transition (already Up)", i+1)
}
}
if b2.State != StateUp {
t.Errorf("after alternating: want up, got %s", b2.State)
}
}
// TestNextInterval: correct interval selection based on counter state.
func TestNextInterval(t *testing.T) {
interval := 2 * time.Second
fast := 500 * time.Millisecond
down := 30 * time.Second
b := New("test", net.ParseIP("10.0.0.1"), 2, 3) // max=4
// Unknown (no probes yet): always use interval, never downInterval.
if got := b.NextInterval(interval, fast, down); got != interval {
t.Errorf("StateUnknown: got %v, want %v (interval)", got, interval)
}
// After first fail: counter=0, state=Down → downInterval.
b.Record(ProbeResult{OK: false, Code: "L4CON"}, 5)
if b.State != StateDown {
t.Fatalf("expected StateDown after first fail, got %s", b.State)
}
if got := b.NextInterval(interval, fast, down); got != down {
t.Errorf("StateDown/counter=0: got %v, want %v (down)", got, down)
}
// Drive to max (fully healthy) → interval.
b.Counter.Health = b.Counter.Max()
if got := b.NextInterval(interval, fast, down); got != interval {
t.Errorf("counter=max: got %v, want %v (interval)", got, interval)
}
// Degraded (0 < counter < max) → fastInterval.
b.Counter.Health = 1
if got := b.NextInterval(interval, fast, down); got != fast {
t.Errorf("counter=1 (degraded): got %v, want %v (fast)", got, fast)
}
// No fastInterval configured → falls back to interval.
if got := b.NextInterval(interval, 0, down); got != interval {
t.Errorf("degraded, no fast: got %v, want %v (interval)", got, interval)
}
// No downInterval configured → falls back to interval.
b.Counter.Health = 0
if got := b.NextInterval(interval, fast, 0); got != interval {
t.Errorf("down, no downInterval: got %v, want %v (interval)", got, interval)
}
}
func TestPauseResume(t *testing.T) {
b := newBackend()
b.State = StateUp
changed := b.Pause(5)
if !changed {
t.Error("Pause should return true")
}
if b.State != StatePaused {
t.Errorf("after Pause: got %s, want paused", b.State)
}
// Probes ignored while paused.
if b.Record(pass(), 5) {
t.Error("Record(pass) should not transition while paused")
}
if b.Record(fail(), 5) {
t.Error("Record(fail) should not transition while paused")
}
if b.State != StatePaused {
t.Errorf("state changed while paused: %s", b.State)
}
// Second Pause is a no-op.
if b.Pause(5) {
t.Error("second Pause should return false")
}
changed = b.Resume(5)
if !changed {
t.Error("Resume should return true")
}
if b.State != StateUnknown {
t.Errorf("after Resume: got %s, want unknown", b.State)
}
// Resume on non-paused is a no-op.
if b.Resume(5) {
t.Error("Resume on non-paused should return false")
}
}
func TestTransitionHistory(t *testing.T) {
b := newBackend()
maxHistory := 3
// Drive several state changes. Each cycle: pass×2→Up, fail→Down (Unknown→Down on first fail).
b.Record(fail(), maxHistory) // Unknown→Down
b.Record(pass(), maxHistory) // counter++
b.Record(pass(), maxHistory) // Down→Up
b.Record(fail(), maxHistory) // Up: counter drops
b.Record(fail(), maxHistory) // Up: counter drops
b.Record(fail(), maxHistory) // Up→Down
b.Record(pass(), maxHistory) // counter++
b.Record(pass(), maxHistory) // Down→Up
if len(b.Transitions) != maxHistory {
t.Errorf("transitions capped at %d, got %d", maxHistory, len(b.Transitions))
}
// Newest first: last transition was →Up.
if b.Transitions[0].To != StateUp {
t.Errorf("newest transition: got %s, want up", b.Transitions[0].To)
}
// Transitions carry ProbeResult.
if b.Transitions[0].Result.Code == "" {
t.Error("transition result code should not be empty")
}
}
func TestTransitionTimestamp(t *testing.T) {
b := newBackend()
before := time.Now()
b.Record(fail(), 5)
after := time.Now()
if len(b.Transitions) == 0 {
t.Fatal("expected a transition")
}
ts := b.Transitions[0].At
if ts.Before(before) || ts.After(after) {
t.Errorf("transition timestamp %v outside [%v, %v]", ts, before, after)
}
}
func TestStateString(t *testing.T) {
cases := []struct {
s State
want string
}{
{StateUnknown, "unknown"},
{StateUp, "up"},
{StateDown, "down"},
{StatePaused, "paused"},
}
for _, c := range cases {
if c.s.String() != c.want {
t.Errorf("State(%d).String() = %q, want %q", c.s, c.s.String(), c.want)
}
}
}