- Replaced flat `backends: [...]` list on frontends with an ordered `pools:`
list; each pool has a name and a map of backends with per-pool weights (0–100,
default 100). Pools express priority: first pool with a healthy backend wins.
- Removed global backend weight (was on the backend, now lives in the pool).
- Config validation enforces non-empty pools, non-empty pool names, weight
range, and consistent address families across all pools of a frontend.
- Added `PoolBackendInfo { name, weight }` and changed `PoolInfo.backends` from
`repeated string` to `repeated PoolBackendInfo` so weights are visible over
the API.
- Full interactive shell with readline, tab completion, and `?` inline help.
- Command tree parser (Walk) handles fixed keywords and dynamic slot nodes;
prefix matching with exact-match priority.
- Commands: `show version/frontends/frontend/backends/backend/healthchecks/
healthcheck`, `set backend <name> pause|resume`, `quit`/`exit`.
- `show frontend` output is hierarchical (pools → backends) with per-backend
weights and `[disabled]` notation; pool section uses fixed-width formatting
so ANSI color codes don't corrupt tabwriter alignment.
- `-color` flag (default true) wraps static field labels in dark-blue ANSI;
works correctly with tabwriter because all labels carry identical-length
escape sequences.
- `cmd/version.go` package holds `version`, `commit`, `date` vars set at build
time via `-ldflags -X`.
- `make build` / `make build-amd64` / `make build-arm64` all inject
`VERSION=0.1.1`, `COMMIT_HASH` (from `git rev-parse --short HEAD`), and
`DATE` (UTC ISO-8601).
- `maglevc` prints version on interactive startup and exposes `show version`.
- `maglevd` logs version/commit/date at startup; `-version` flag prints and exits.
- `doHTTPProbe` was building a `https://` target URL even though TLS was already
applied to the connection inside `inNetns`. `http.Transport` then wrapped the
connection in a second TLS layer, producing "http: server gave HTTP response
to HTTPS client". Fixed by always using `http://` in the target URL.
- Added `TestHTTPSProbe` using `httptest.NewTLSServer` to cover the full path.
- New `docs/user-guide.md`: maglevd flags/signals, maglevc commands, shell
completion, and command-tree parser walkthrough.
- New `docs/healthchecks.md`: state machine, rise/fall model, probe intervals,
all transition events with log examples.
- Updated `docs/config-guide.md`: pools design, removed global weight from
backends, updated all examples.
- Updated `README.md`: packaging table, build paths, corrected binary locations
(`/usr/sbin/maglevd`), config filename (`.yaml`).
- `debian/` directory contains `control.in`, `maglevd.service`, `default.maglev`,
`maglev.yaml` (example config), `conffiles`, `postinst`, `prerm`.
- `debian/build-deb.sh` stages a package tree and calls `dpkg-deb`; emits
`build/vpp-maglev_<version>~<commit>_<arch>.deb`.
- Cross-compiles for amd64 and arm64 in one `make pkg-deb` invocation.
- `maglevd` installed to `/usr/sbin/`, `maglevc` to `/usr/bin/`.
- Service reads `MAGLEV_CONFIG` from `/etc/default/maglev`
(default: `/etc/maglev/maglev.yaml`).
- Man pages `maglevd(8)` and `maglevc(1)` live in `docs/` and are gzip'd into
the package.
- All build output goes to `build/<arch>/`; `build/` is gitignored.
283 lines
6.7 KiB
Go
283 lines
6.7 KiB
Go
// Copyright (c) 2026, Pim van Pelt <pim@ipng.ch>
|
|
|
|
package checker
|
|
|
|
import (
|
|
"context"
|
|
"net"
|
|
"testing"
|
|
"time"
|
|
|
|
"git.ipng.ch/ipng/vpp-maglev/internal/config"
|
|
"git.ipng.ch/ipng/vpp-maglev/internal/health"
|
|
)
|
|
|
|
func makeTestConfig(interval time.Duration, fall, rise int) *config.Config {
|
|
return &config.Config{
|
|
HealthChecker: config.HealthCheckerConfig{TransitionHistory: 5},
|
|
HealthChecks: map[string]config.HealthCheck{
|
|
"icmp": {
|
|
Type: "icmp",
|
|
Interval: interval,
|
|
Timeout: time.Second,
|
|
Fall: fall,
|
|
Rise: rise,
|
|
},
|
|
},
|
|
Backends: map[string]config.Backend{
|
|
"be0": {
|
|
Address: net.ParseIP("10.0.0.2"),
|
|
HealthCheck: "icmp",
|
|
Enabled: true,
|
|
},
|
|
},
|
|
Frontends: map[string]config.Frontend{
|
|
"web": {
|
|
Address: net.ParseIP("192.0.2.1"),
|
|
Protocol: "tcp",
|
|
Port: 80,
|
|
Pools: []config.Pool{
|
|
{Name: "primary", Backends: map[string]config.PoolBackend{
|
|
"be0": {Weight: 100},
|
|
}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func TestHealthCheckEqual(t *testing.T) {
|
|
a := config.HealthCheck{
|
|
Type: "http",
|
|
Interval: time.Second,
|
|
Timeout: 2 * time.Second,
|
|
Fall: 3,
|
|
Rise: 2,
|
|
HTTP: &config.HTTPParams{Path: "/healthz", ResponseCodeMin: 200, ResponseCodeMax: 200},
|
|
}
|
|
b := a
|
|
if !healthCheckEqual(a, b) {
|
|
t.Error("identical configs should be equal")
|
|
}
|
|
b.Fall = 5
|
|
if healthCheckEqual(a, b) {
|
|
t.Error("different Fall should not be equal")
|
|
}
|
|
b = a
|
|
b.FastInterval = 500 * time.Millisecond
|
|
if healthCheckEqual(a, b) {
|
|
t.Error("different FastInterval should not be equal")
|
|
}
|
|
b = a
|
|
b.HTTP = &config.HTTPParams{Path: "/other", ResponseCodeMin: 200, ResponseCodeMax: 200}
|
|
if healthCheckEqual(a, b) {
|
|
t.Error("different HTTP.Path should not be equal")
|
|
}
|
|
}
|
|
|
|
func TestStateMachineViaBackend(t *testing.T) {
|
|
b := health.New("be0", net.ParseIP("10.0.0.2"), 2, 3)
|
|
pass := health.ProbeResult{OK: true, Layer: health.LayerL7, Code: "L7OK"}
|
|
fail := health.ProbeResult{OK: false, Layer: health.LayerL4, Code: "L4CON"}
|
|
|
|
if !b.Record(fail, 5) {
|
|
t.Error("first fail from Unknown should transition to Down")
|
|
}
|
|
if b.State != health.StateDown {
|
|
t.Errorf("expected down, got %s", b.State)
|
|
}
|
|
if b.Record(pass, 5) {
|
|
t.Error("should not transition after 1 pass (rise=2)")
|
|
}
|
|
if !b.Record(pass, 5) {
|
|
t.Error("should transition to Up after 2 passes")
|
|
}
|
|
if b.State != health.StateUp {
|
|
t.Errorf("expected up, got %s", b.State)
|
|
}
|
|
}
|
|
|
|
func TestStaggerDelay(t *testing.T) {
|
|
interval := 10 * time.Second
|
|
if got := staggerDelay(interval, 0, 10); got != 0 {
|
|
t.Errorf("pos=0: got %v, want 0", got)
|
|
}
|
|
if got := staggerDelay(interval, 5, 10); got != 5*time.Second {
|
|
t.Errorf("pos=5/10: got %v, want 5s", got)
|
|
}
|
|
if got := staggerDelay(interval, 0, 1); got != 0 {
|
|
t.Errorf("total=1: got %v, want 0", got)
|
|
}
|
|
}
|
|
|
|
func TestReloadAddsBackend(t *testing.T) {
|
|
cfg := makeTestConfig(10*time.Millisecond, 3, 2)
|
|
c := New(cfg)
|
|
|
|
newCfg := makeTestConfig(10*time.Millisecond, 3, 2)
|
|
newCfg.Backends["be1"] = config.Backend{
|
|
Address: net.ParseIP("10.0.0.3"),
|
|
HealthCheck: "icmp",
|
|
Enabled: true,
|
|
}
|
|
newCfg.Frontends["web2"] = config.Frontend{
|
|
Address: net.ParseIP("192.0.2.2"),
|
|
Protocol: "tcp",
|
|
Port: 443,
|
|
Pools: []config.Pool{
|
|
{Name: "primary", Backends: map[string]config.PoolBackend{
|
|
"be1": {Weight: 100},
|
|
}},
|
|
},
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
if err := c.Reload(ctx, newCfg); err != nil {
|
|
t.Fatalf("Reload: %v", err)
|
|
}
|
|
|
|
c.mu.RLock()
|
|
_, ok := c.workers["be1"]
|
|
c.mu.RUnlock()
|
|
if !ok {
|
|
t.Error("new backend not added after Reload")
|
|
}
|
|
}
|
|
|
|
func TestReloadRemovesBackend(t *testing.T) {
|
|
cfg := makeTestConfig(10*time.Millisecond, 3, 2)
|
|
c := New(cfg)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
cancel()
|
|
|
|
// Seed a worker manually.
|
|
c.mu.Lock()
|
|
wCtx, wCancel := context.WithCancel(context.Background())
|
|
c.workers["be0"] = &worker{
|
|
backend: health.New("be0", net.ParseIP("10.0.0.2"), 2, 3),
|
|
hc: cfg.HealthChecks["icmp"],
|
|
entry: cfg.Backends["be0"],
|
|
cancel: wCancel,
|
|
}
|
|
c.mu.Unlock()
|
|
_ = wCtx
|
|
|
|
// Remove all frontends → be0 is no longer active.
|
|
newCfg := &config.Config{
|
|
HealthChecker: cfg.HealthChecker,
|
|
HealthChecks: cfg.HealthChecks,
|
|
Backends: cfg.Backends,
|
|
Frontends: map[string]config.Frontend{},
|
|
}
|
|
|
|
if err := c.Reload(ctx, newCfg); err != nil {
|
|
t.Fatalf("Reload: %v", err)
|
|
}
|
|
|
|
c.mu.RLock()
|
|
_, ok := c.workers["be0"]
|
|
c.mu.RUnlock()
|
|
if ok {
|
|
t.Error("removed backend still present after Reload")
|
|
}
|
|
}
|
|
|
|
func TestSharedBackendProbedOnce(t *testing.T) {
|
|
// be0 is referenced by two frontends — only one worker should exist.
|
|
cfg := makeTestConfig(10*time.Millisecond, 3, 2)
|
|
cfg.Frontends["web-tls"] = config.Frontend{
|
|
Address: net.ParseIP("192.0.2.3"),
|
|
Protocol: "tcp",
|
|
Port: 443,
|
|
Pools: []config.Pool{
|
|
{Name: "primary", Backends: map[string]config.PoolBackend{
|
|
"be0": {Weight: 100},
|
|
}},
|
|
},
|
|
}
|
|
|
|
c := New(cfg)
|
|
names := activeBackendNames(c.cfg)
|
|
if len(names) != 1 || names[0] != "be0" {
|
|
t.Errorf("expected exactly one active backend, got %v", names)
|
|
}
|
|
}
|
|
|
|
func TestSubscribe(t *testing.T) {
|
|
cfg := makeTestConfig(10*time.Millisecond, 1, 1)
|
|
c := New(cfg)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
go c.fanOut(ctx)
|
|
|
|
ch, unsub := c.Subscribe()
|
|
defer unsub()
|
|
|
|
e := Event{
|
|
FrontendName: "web",
|
|
BackendName: "be0",
|
|
Backend: net.ParseIP("10.0.0.2"),
|
|
Transition: health.Transition{
|
|
From: health.StateUnknown,
|
|
To: health.StateUp,
|
|
},
|
|
}
|
|
c.mu.Lock()
|
|
c.emit(e)
|
|
c.mu.Unlock()
|
|
|
|
select {
|
|
case got := <-ch:
|
|
if got.FrontendName != "web" {
|
|
t.Errorf("event FrontendName: got %q, want web", got.FrontendName)
|
|
}
|
|
if got.BackendName != "be0" {
|
|
t.Errorf("event BackendName: got %q, want be0", got.BackendName)
|
|
}
|
|
if got.Transition.To != health.StateUp {
|
|
t.Errorf("event To state: got %s, want up", got.Transition.To)
|
|
}
|
|
case <-time.After(time.Second):
|
|
t.Error("timed out waiting for event")
|
|
}
|
|
}
|
|
|
|
func TestPauseResume(t *testing.T) {
|
|
cfg := makeTestConfig(time.Hour, 3, 2)
|
|
c := New(cfg)
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
go c.fanOut(ctx)
|
|
|
|
c.mu.Lock()
|
|
_, wCancel := context.WithCancel(ctx)
|
|
c.workers["be0"] = &worker{
|
|
backend: health.New("be0", net.ParseIP("10.0.0.2"), 2, 3),
|
|
hc: cfg.HealthChecks["icmp"],
|
|
entry: cfg.Backends["be0"],
|
|
cancel: wCancel,
|
|
}
|
|
c.mu.Unlock()
|
|
|
|
b, ok := c.PauseBackend("be0")
|
|
if !ok {
|
|
t.Fatal("PauseBackend: not found")
|
|
}
|
|
if b.Health.State != health.StatePaused {
|
|
t.Errorf("after pause: %s", b.Health.State)
|
|
}
|
|
|
|
b, ok = c.ResumeBackend("be0")
|
|
if !ok {
|
|
t.Fatal("ResumeBackend: not found")
|
|
}
|
|
if b.Health.State != health.StateUnknown {
|
|
t.Errorf("after resume: %s", b.Health.State)
|
|
}
|
|
}
|