Initial revisin of healthchecker, inspired by HAProxy

This commit is contained in:
2026-04-10 17:30:44 +02:00
commit b84b3274b1
24 changed files with 4400 additions and 0 deletions

View File

@@ -0,0 +1,248 @@
package checker
import (
"context"
"net"
"testing"
"time"
"git.ipng.ch/ipng/vpp-maglev/internal/config"
"git.ipng.ch/ipng/vpp-maglev/internal/health"
)
func makeTestConfig(interval time.Duration, fall, rise int) *config.Frontend {
return &config.Frontend{
HealthCheckNetns: "test",
HealthChecker: config.HealthCheckerConfig{TransitionHistory: 5},
VIPs: map[string]config.VIP{
"web": {
Address: net.ParseIP("192.0.2.1"),
Protocol: "tcp",
Port: 80,
Backends: []net.IP{net.ParseIP("10.0.0.2")},
HealthCheck: config.HealthCheck{
Type: "icmp",
Interval: interval,
Timeout: time.Second,
Fall: fall,
Rise: rise,
},
},
},
}
}
func TestHealthCheckEqual(t *testing.T) {
a := config.HealthCheck{
Type: "http",
Interval: time.Second,
Timeout: 2 * time.Second,
Fall: 3,
Rise: 2,
HTTP: &config.HTTPParams{Path: "/healthz", ResponseCodeMin: 200, ResponseCodeMax: 200},
}
b := a
if !healthCheckEqual(a, b) {
t.Error("identical configs should be equal")
}
b.Fall = 5
if healthCheckEqual(a, b) {
t.Error("different Fall should not be equal")
}
b = a
b.FastInterval = 500 * time.Millisecond
if healthCheckEqual(a, b) {
t.Error("different FastInterval should not be equal")
}
b = a
b.HTTP = &config.HTTPParams{Path: "/other", ResponseCodeMin: 200, ResponseCodeMax: 200}
if healthCheckEqual(a, b) {
t.Error("different HTTP.Path should not be equal")
}
}
func TestStateMachineViaBackend(t *testing.T) {
// Directly test Backend state transitions (rise=2, fall=3) without goroutines.
b := health.New("web", net.ParseIP("10.0.0.2"), 2, 3)
pass := health.ProbeResult{OK: true, Layer: health.LayerL7, Code: "L7OK"}
fail := health.ProbeResult{OK: false, Layer: health.LayerL4, Code: "L4CON"}
// Unknown → Down on first fail.
if !b.Record(fail, 5) {
t.Error("first fail from Unknown should transition to Down")
}
if b.State != health.StateDown {
t.Errorf("expected down, got %s", b.State)
}
// rise=2 passes → Up.
if b.Record(pass, 5) {
t.Error("should not transition after 1 pass (rise=2)")
}
if !b.Record(pass, 5) {
t.Error("should transition to Up after 2 passes")
}
if b.State != health.StateUp {
t.Errorf("expected up, got %s", b.State)
}
}
func TestStaggerDelay(t *testing.T) {
interval := 10 * time.Second
if got := staggerDelay(interval, 0, 10); got != 0 {
t.Errorf("pos=0: got %v, want 0", got)
}
if got := staggerDelay(interval, 5, 10); got != 5*time.Second {
t.Errorf("pos=5/10: got %v, want 5s", got)
}
if got := staggerDelay(interval, 0, 1); got != 0 {
t.Errorf("total=1: got %v, want 0", got)
}
}
func TestReloadAddsBackend(t *testing.T) {
cfg := makeTestConfig(10*time.Millisecond, 3, 2)
c := New(cfg)
newCfg := makeTestConfig(10*time.Millisecond, 3, 2)
newCfg.VIPs["web2"] = config.VIP{
Address: net.ParseIP("192.0.2.2"),
Protocol: "tcp",
Port: 443,
Backends: []net.IP{net.ParseIP("10.0.0.3")},
HealthCheck: config.HealthCheck{
Type: "icmp",
Interval: 10 * time.Millisecond,
Timeout: time.Second,
Fall: 3,
Rise: 2,
},
}
// Cancelled context: no probe goroutines actually run.
ctx, cancel := context.WithCancel(context.Background())
cancel()
if err := c.Reload(ctx, newCfg); err != nil {
t.Fatalf("Reload: %v", err)
}
c.mu.RLock()
_, ok := c.workers[backendKey{VIPName: "web2", Backend: "10.0.0.3"}]
c.mu.RUnlock()
if !ok {
t.Error("new backend not added after Reload")
}
}
func TestReloadRemovesBackend(t *testing.T) {
cfg := makeTestConfig(10*time.Millisecond, 3, 2)
c := New(cfg)
ctx, cancel := context.WithCancel(context.Background())
cancel()
// Seed a worker manually.
c.mu.Lock()
key := backendKey{VIPName: "web", Backend: "10.0.0.2"}
wCtx, wCancel := context.WithCancel(context.Background())
c.workers[key] = &worker{
backend: health.New("web", net.ParseIP("10.0.0.2"), 2, 3),
hc: cfg.VIPs["web"].HealthCheck,
vip: cfg.VIPs["web"],
cancel: wCancel,
}
c.mu.Unlock()
_ = wCtx
// New config with "web" VIP removed.
newCfg := &config.Frontend{
HealthCheckNetns: cfg.HealthCheckNetns,
HealthChecker: cfg.HealthChecker,
VIPs: map[string]config.VIP{},
}
if err := c.Reload(ctx, newCfg); err != nil {
t.Fatalf("Reload: %v", err)
}
c.mu.RLock()
_, ok := c.workers[key]
c.mu.RUnlock()
if ok {
t.Error("removed backend still present after Reload")
}
}
func TestSubscribe(t *testing.T) {
cfg := makeTestConfig(10*time.Millisecond, 1, 1)
c := New(cfg)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go c.fanOut(ctx)
ch, unsub := c.Subscribe()
defer unsub()
e := Event{
VIPName: "web",
Backend: net.ParseIP("10.0.0.2"),
Transition: health.Transition{
From: health.StateUnknown,
To: health.StateUp,
},
}
c.mu.Lock()
c.emit(e)
c.mu.Unlock()
select {
case got := <-ch:
if got.VIPName != "web" {
t.Errorf("event VIPName: got %q, want %q", got.VIPName, "web")
}
if got.Transition.To != health.StateUp {
t.Errorf("event To state: got %s, want up", got.Transition.To)
}
case <-time.After(time.Second):
t.Error("timed out waiting for event")
}
}
func TestPauseResume(t *testing.T) {
cfg := makeTestConfig(time.Hour, 3, 2) // long interval so probes never fire
c := New(cfg)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go c.fanOut(ctx)
// Seed a worker.
c.mu.Lock()
key := backendKey{VIPName: "web", Backend: "10.0.0.2"}
_, wCancel := context.WithCancel(ctx)
c.workers[key] = &worker{
backend: health.New("web", net.ParseIP("10.0.0.2"), 2, 3),
hc: cfg.VIPs["web"].HealthCheck,
vip: cfg.VIPs["web"],
cancel: wCancel,
}
c.mu.Unlock()
b, ok := c.PauseBackend("web", "10.0.0.2")
if !ok {
t.Fatal("PauseBackend: not found")
}
if b.State != health.StatePaused {
t.Errorf("after pause: %s", b.State)
}
b, ok = c.ResumeBackend("web", "10.0.0.2")
if !ok {
t.Fatal("ResumeBackend: not found")
}
if b.State != health.StateUnknown {
t.Errorf("after resume: %s", b.State)
}
}