Files
vpp-maglev/internal/checker/checker_test.go
Pim van Pelt 8bde00eb61 Fix pause to cancel probe goroutine; add Robot Framework integration tests
Pause semantics
- PauseBackend now cancels the probe goroutine so no HTTP/TCP/ICMP
  traffic is sent while the backend is paused. Previously the goroutine
  kept running and results were silently discarded.
- ResumeBackend launches a fresh probe goroutine on the existing worker,
  preserving transition history. The backend re-enters unknown state.

Integration tests (tests/01-maglevd/)
- Containerlab topology with 3 nginx:alpine backends on a dedicated
  management network (172.20.30.0/24) with static IPs.
- maglevd config with 200ms HTTP health-check interval for fast test
  convergence (rise=2, fall=2).
- 8 test cases: deploy lab, start maglevd, all backends reach up,
  nginx logs confirm probes arriving, pause stops probes (probe count
  stable), resume restarts probes, disable stops probes, enable
  restarts probes.

VPP dataplane test (tests/02-vpp-lb/)
- Rewrite 01-e2e-lab.robot to match the actual single-VPP topology:
  test client-to-server ping through VPP bridge domains and verify
  nginx is serving on all app servers. The previous version referenced
  a non-existent topology file and tested OSPF/BFD between two VPP
  nodes that don't exist in this lab.

Build infrastructure
- Add 'make robot-test' target with TEST= for suite selection.
- Add tests/.venv target for Robot Framework virtualenv.
- Make IMAGE optional in rf-run.sh.
- Add .gitignore entries for test output, venv, logs, and clab state.
2026-04-11 20:19:36 +02:00

377 lines
9.3 KiB
Go

// Copyright (c) 2026, Pim van Pelt <pim@ipng.ch>
package checker
import (
"context"
"net"
"testing"
"time"
"git.ipng.ch/ipng/vpp-maglev/internal/config"
"git.ipng.ch/ipng/vpp-maglev/internal/health"
)
func makeTestConfig(interval time.Duration, fall, rise int) *config.Config {
return &config.Config{
HealthChecker: config.HealthCheckerConfig{TransitionHistory: 5},
HealthChecks: map[string]config.HealthCheck{
"icmp": {
Type: "icmp",
Interval: interval,
Timeout: time.Second,
Fall: fall,
Rise: rise,
},
},
Backends: map[string]config.Backend{
"be0": {
Address: net.ParseIP("10.0.0.2"),
HealthCheck: "icmp",
Enabled: true,
},
},
Frontends: map[string]config.Frontend{
"web": {
Address: net.ParseIP("192.0.2.1"),
Protocol: "tcp",
Port: 80,
Pools: []config.Pool{
{Name: "primary", Backends: map[string]config.PoolBackend{
"be0": {Weight: 100},
}},
},
},
},
}
}
func TestHealthCheckEqual(t *testing.T) {
a := config.HealthCheck{
Type: "http",
Interval: time.Second,
Timeout: 2 * time.Second,
Fall: 3,
Rise: 2,
HTTP: &config.HTTPParams{Path: "/healthz", ResponseCodeMin: 200, ResponseCodeMax: 200},
}
b := a
if !healthCheckEqual(a, b) {
t.Error("identical configs should be equal")
}
b.Fall = 5
if healthCheckEqual(a, b) {
t.Error("different Fall should not be equal")
}
b = a
b.FastInterval = 500 * time.Millisecond
if healthCheckEqual(a, b) {
t.Error("different FastInterval should not be equal")
}
b = a
b.HTTP = &config.HTTPParams{Path: "/other", ResponseCodeMin: 200, ResponseCodeMax: 200}
if healthCheckEqual(a, b) {
t.Error("different HTTP.Path should not be equal")
}
}
func TestStateMachineViaBackend(t *testing.T) {
b := health.New("be0", net.ParseIP("10.0.0.2"), 2, 3)
pass := health.ProbeResult{OK: true, Layer: health.LayerL7, Code: "L7OK"}
fail := health.ProbeResult{OK: false, Layer: health.LayerL4, Code: "L4CON"}
if !b.Record(fail, 5) {
t.Error("first fail from Unknown should transition to Down")
}
if b.State != health.StateDown {
t.Errorf("expected down, got %s", b.State)
}
if b.Record(pass, 5) {
t.Error("should not transition after 1 pass (rise=2)")
}
if !b.Record(pass, 5) {
t.Error("should transition to Up after 2 passes")
}
if b.State != health.StateUp {
t.Errorf("expected up, got %s", b.State)
}
}
func TestStaggerDelay(t *testing.T) {
interval := 10 * time.Second
if got := staggerDelay(interval, 0, 10); got != 0 {
t.Errorf("pos=0: got %v, want 0", got)
}
if got := staggerDelay(interval, 5, 10); got != 5*time.Second {
t.Errorf("pos=5/10: got %v, want 5s", got)
}
if got := staggerDelay(interval, 0, 1); got != 0 {
t.Errorf("total=1: got %v, want 0", got)
}
}
func TestReloadAddsBackend(t *testing.T) {
cfg := makeTestConfig(10*time.Millisecond, 3, 2)
c := New(cfg)
newCfg := makeTestConfig(10*time.Millisecond, 3, 2)
newCfg.Backends["be1"] = config.Backend{
Address: net.ParseIP("10.0.0.3"),
HealthCheck: "icmp",
Enabled: true,
}
newCfg.Frontends["web2"] = config.Frontend{
Address: net.ParseIP("192.0.2.2"),
Protocol: "tcp",
Port: 443,
Pools: []config.Pool{
{Name: "primary", Backends: map[string]config.PoolBackend{
"be1": {Weight: 100},
}},
},
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
if err := c.Reload(ctx, newCfg); err != nil {
t.Fatalf("Reload: %v", err)
}
c.mu.RLock()
_, ok := c.workers["be1"]
c.mu.RUnlock()
if !ok {
t.Error("new backend not added after Reload")
}
}
func TestReloadRemovesBackend(t *testing.T) {
cfg := makeTestConfig(10*time.Millisecond, 3, 2)
c := New(cfg)
ctx, cancel := context.WithCancel(context.Background())
cancel()
// Seed a worker manually.
c.mu.Lock()
wCtx, wCancel := context.WithCancel(context.Background())
c.workers["be0"] = &worker{
backend: health.New("be0", net.ParseIP("10.0.0.2"), 2, 3),
hc: cfg.HealthChecks["icmp"],
entry: cfg.Backends["be0"],
cancel: wCancel,
}
c.mu.Unlock()
_ = wCtx
// Remove all frontends → be0 is no longer active.
newCfg := &config.Config{
HealthChecker: cfg.HealthChecker,
HealthChecks: cfg.HealthChecks,
Backends: cfg.Backends,
Frontends: map[string]config.Frontend{},
}
if err := c.Reload(ctx, newCfg); err != nil {
t.Fatalf("Reload: %v", err)
}
c.mu.RLock()
_, ok := c.workers["be0"]
c.mu.RUnlock()
if ok {
t.Error("removed backend still present after Reload")
}
}
func TestSharedBackendProbedOnce(t *testing.T) {
// be0 is referenced by two frontends — only one worker should exist.
cfg := makeTestConfig(10*time.Millisecond, 3, 2)
cfg.Frontends["web-tls"] = config.Frontend{
Address: net.ParseIP("192.0.2.3"),
Protocol: "tcp",
Port: 443,
Pools: []config.Pool{
{Name: "primary", Backends: map[string]config.PoolBackend{
"be0": {Weight: 100},
}},
},
}
c := New(cfg)
names := activeBackendNames(c.cfg)
if len(names) != 1 || names[0] != "be0" {
t.Errorf("expected exactly one active backend, got %v", names)
}
}
func TestSubscribe(t *testing.T) {
cfg := makeTestConfig(10*time.Millisecond, 1, 1)
c := New(cfg)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go c.fanOut(ctx)
ch, unsub := c.Subscribe()
defer unsub()
e := Event{
FrontendName: "web",
BackendName: "be0",
Backend: net.ParseIP("10.0.0.2"),
Transition: health.Transition{
From: health.StateUnknown,
To: health.StateUp,
},
}
c.mu.Lock()
c.emit(e)
c.mu.Unlock()
select {
case got := <-ch:
if got.FrontendName != "web" {
t.Errorf("event FrontendName: got %q, want web", got.FrontendName)
}
if got.BackendName != "be0" {
t.Errorf("event BackendName: got %q, want be0", got.BackendName)
}
if got.Transition.To != health.StateUp {
t.Errorf("event To state: got %s, want up", got.Transition.To)
}
case <-time.After(time.Second):
t.Error("timed out waiting for event")
}
}
func TestSetFrontendPoolBackendWeight(t *testing.T) {
cfg := makeTestConfig(time.Hour, 3, 2)
c := New(cfg)
// Valid weight change.
fe, err := c.SetFrontendPoolBackendWeight("web", "primary", "be0", 42)
if err != nil {
t.Fatalf("SetFrontendPoolBackendWeight: %v", err)
}
if fe.Pools[0].Backends["be0"].Weight != 42 {
t.Errorf("weight: got %d, want 42", fe.Pools[0].Backends["be0"].Weight)
}
// Persisted in live config.
got, _ := c.GetFrontend("web")
if got.Pools[0].Backends["be0"].Weight != 42 {
t.Errorf("config weight: got %d, want 42", got.Pools[0].Backends["be0"].Weight)
}
// Out-of-range weight.
if _, err := c.SetFrontendPoolBackendWeight("web", "primary", "be0", 101); err == nil {
t.Error("expected error for weight 101")
}
// Unknown frontend.
if _, err := c.SetFrontendPoolBackendWeight("nope", "primary", "be0", 50); err == nil {
t.Error("expected error for unknown frontend")
}
// Unknown pool.
if _, err := c.SetFrontendPoolBackendWeight("web", "nope", "be0", 50); err == nil {
t.Error("expected error for unknown pool")
}
// Unknown backend.
if _, err := c.SetFrontendPoolBackendWeight("web", "primary", "nope", 50); err == nil {
t.Error("expected error for unknown backend in pool")
}
}
func TestEnableDisable(t *testing.T) {
cfg := makeTestConfig(time.Hour, 3, 2)
c := New(cfg)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go c.fanOut(ctx)
// Seed a worker as EnableBackend/DisableBackend require one in c.workers.
wCtx, wCancel := context.WithCancel(ctx)
c.mu.Lock()
c.runCtx = ctx
c.workers["be0"] = &worker{
backend: health.New("be0", net.ParseIP("10.0.0.2"), 2, 3),
hc: cfg.HealthChecks["icmp"],
entry: cfg.Backends["be0"],
cancel: wCancel,
}
c.mu.Unlock()
_ = wCtx
b, ok := c.DisableBackend("be0")
if !ok {
t.Fatal("DisableBackend: not found")
}
if b.Health.State != health.StateRemoved {
t.Errorf("after disable: state=%s, want removed", b.Health.State)
}
if b.Config.Enabled {
t.Error("after disable: Enabled should be false")
}
// Backend should still be visible after disable.
snap, ok := c.GetBackend("be0")
if !ok {
t.Fatal("GetBackend after disable: not found")
}
if snap.Config.Enabled {
t.Error("GetBackend after disable: Enabled should be false")
}
b, ok = c.EnableBackend("be0")
if !ok {
t.Fatal("EnableBackend: not found")
}
if b.Health.State != health.StateUnknown {
t.Errorf("after enable: state=%s, want unknown", b.Health.State)
}
if !b.Config.Enabled {
t.Error("after enable: Enabled should be true")
}
}
func TestPauseResume(t *testing.T) {
cfg := makeTestConfig(time.Hour, 3, 2)
c := New(cfg)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go c.fanOut(ctx)
c.mu.Lock()
c.runCtx = ctx
_, wCancel := context.WithCancel(ctx)
c.workers["be0"] = &worker{
backend: health.New("be0", net.ParseIP("10.0.0.2"), 2, 3),
hc: cfg.HealthChecks["icmp"],
entry: cfg.Backends["be0"],
cancel: wCancel,
wakeCh: make(chan struct{}, 1),
}
c.mu.Unlock()
b, ok := c.PauseBackend("be0")
if !ok {
t.Fatal("PauseBackend: not found")
}
if b.Health.State != health.StatePaused {
t.Errorf("after pause: %s", b.Health.State)
}
b, ok = c.ResumeBackend("be0")
if !ok {
t.Fatal("ResumeBackend: not found")
}
if b.Health.State != health.StateUnknown {
t.Errorf("after resume: %s", b.Health.State)
}
}