New feature: per-VIP / per-backend runtime counters
* New GetVPPLBCounters RPC serving an in-process snapshot refreshed
by a 5s scrape loop (internal/vpp/lbstats.go). Each cycle pulls
the LB plugin's four SimpleCounters (next, first, untracked,
no-server) plus the FIB /net/route/to CombinedCounter for every
VIP and every backend host prefix via a single DumpStats call.
* FIB stats-index discovery via ip_route_lookup (internal/vpp/
fibstats.go); per-worker reduction happens in the collector.
* Prometheus collector exports vip_packets_total (kind label),
vip_route_{packets,bytes}_total, and backend_route_{packets,
bytes}_total. Metrics source interface extended with VIPStats /
BackendRouteStats; vpp.Client publishes snapshots via
atomic.Pointer and clears them on disconnect.
* New 'show vpp lb counters' CLI command. The 'show vpp lbstate'
and 'sync vpp lbstate' commands are restructured under 'show
vpp lb {state,counters}' / 'sync vpp lb state' to make room
for the new verb.
New feature: src-ip-sticky frontends
* New frontend YAML key 'src-ip-sticky' (bool). Plumbed through
config.Frontend, desiredVIP, and the lb_add_del_vip_v2 call.
* Reflected in gRPC FrontendInfo.src_ip_sticky and VPPLBVIP.
src_ip_sticky, and shown in 'show vpp lb state' output.
* Scraped back from VPP by parsing 'show lb vips verbose' through
cli_inband — lb_vip_details does not expose the flag. The same
scrape also recovers the LB pool index for each VIP, which the
stats-segment counters are keyed on. This is a documented
temporary workaround until VPP ships an lb_vip_v2_dump.
* src_ip_sticky cannot be mutated on a live VIP, so a flipped flag
triggers a tear-down-and-recreate in reconcileVIP (ASes deleted
with flush, VIP deleted, then re-added). Flip is logged.
New feature: frontend state aggregation and events
* New health.FrontendState (unknown/up/down) and FrontendTransition
types. A frontend is 'up' iff at least one backend has a nonzero
effective weight, 'unknown' iff no backend has real state yet,
and 'down' otherwise.
* Checker tracks per-frontend aggregate state, recomputing after
each backend transition and emitting a frontend-transition Event
on change. Reload drops entries for removed frontends.
* checker.Event gains an optional FrontendTransition pointer;
backend- vs. frontend-transition events are demultiplexed on
that field.
* WatchEvents now sends an initial snapshot of frontend state on
connect (mirroring the existing backend snapshot), subscribes
once to the checker stream, and fans out to backend/frontend
handlers based on the client's filter flags. The proto
FrontendEvent message grows name + transition fields.
* New Checker.FrontendState accessor.
Refactor: pure health helpers
* Moved the priority-failover selector and the (pool idx, active
pool, state, cfg weight) → (vpp weight, flush) mapping out of
internal/vpp/lbsync.go into a new internal/health/weights.go so
the checker can reuse them for frontend-state computation
without importing internal/vpp.
* New functions: health.ActivePoolIndex, BackendEffectiveWeight,
EffectiveWeights, ComputeFrontendState. lbsync.go now calls
these directly; vpp.EffectiveWeights is a thin wrapper over
health.EffectiveWeights retained for the gRPC observability
path. Fully unit-tested in internal/health/weights_test.go.
maglevc polish
* --color default is now mode-aware: on in the interactive shell,
off in one-shot mode so piped output is script-safe. Explicit
--color=true/false still overrides.
* New stripHostMask helper drops /32 and /128 from VIP display;
non-host prefixes pass through unchanged.
* Counter table column order fixed (first before next) and
packets/bytes columns renamed to fib-packets/fib-bytes to
clarify they come from the FIB, not the LB plugin.
Docs
* config-guide: document src-ip-sticky, including the VIP
recreate-on-change caveat.
* user-guide, maglevc.1, maglevd.8: updated command tree, new
counters command, color defaults, and the src-ip-sticky field.
448 lines
12 KiB
Go
448 lines
12 KiB
Go
// Copyright (c) 2026, Pim van Pelt <pim@ipng.ch>
|
|
|
|
package grpcapi
|
|
|
|
import (
|
|
"context"
|
|
"net"
|
|
"testing"
|
|
"time"
|
|
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/credentials/insecure"
|
|
|
|
"git.ipng.ch/ipng/vpp-maglev/internal/checker"
|
|
"git.ipng.ch/ipng/vpp-maglev/internal/config"
|
|
"git.ipng.ch/ipng/vpp-maglev/internal/health"
|
|
)
|
|
|
|
func makeTestChecker(ctx context.Context) *checker.Checker {
|
|
cfg := &config.Config{
|
|
HealthChecker: config.HealthCheckerConfig{TransitionHistory: 5},
|
|
HealthChecks: map[string]config.HealthCheck{
|
|
"icmp": {
|
|
Type: "icmp",
|
|
Interval: time.Hour, // long interval: probes won't fire during tests
|
|
Timeout: time.Second,
|
|
Fall: 3,
|
|
Rise: 2,
|
|
},
|
|
},
|
|
Backends: map[string]config.Backend{
|
|
"be0": {
|
|
Address: net.ParseIP("10.0.0.2"),
|
|
HealthCheck: "icmp",
|
|
Enabled: true,
|
|
},
|
|
},
|
|
Frontends: map[string]config.Frontend{
|
|
"web": {
|
|
Address: net.ParseIP("192.0.2.1"),
|
|
Protocol: "tcp",
|
|
Port: 80,
|
|
Pools: []config.Pool{
|
|
{Name: "primary", Backends: map[string]config.PoolBackend{
|
|
"be0": {Weight: 100},
|
|
}},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
c := checker.New(cfg)
|
|
go c.Run(ctx) //nolint:errcheck
|
|
// Allow the Run goroutine to initialize workers.
|
|
time.Sleep(10 * time.Millisecond)
|
|
return c
|
|
}
|
|
|
|
func startTestServer(t *testing.T, ctx context.Context, c *checker.Checker) (MaglevClient, func()) {
|
|
t.Helper()
|
|
lis, err := net.Listen("tcp", "127.0.0.1:0")
|
|
if err != nil {
|
|
t.Fatalf("listen: %v", err)
|
|
}
|
|
srv := grpc.NewServer()
|
|
RegisterMaglevServer(srv, NewServer(ctx, c, nil, "", nil))
|
|
go srv.Serve(lis) //nolint:errcheck
|
|
|
|
conn, err := grpc.NewClient(lis.Addr().String(),
|
|
grpc.WithTransportCredentials(insecure.NewCredentials()))
|
|
if err != nil {
|
|
t.Fatalf("dial: %v", err)
|
|
}
|
|
return NewMaglevClient(conn), func() {
|
|
conn.Close()
|
|
srv.Stop()
|
|
}
|
|
}
|
|
|
|
func TestListFrontends(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
resp, err := client.ListFrontends(ctx, &ListFrontendsRequest{})
|
|
if err != nil {
|
|
t.Fatalf("ListFrontends: %v", err)
|
|
}
|
|
if len(resp.FrontendNames) != 1 || resp.FrontendNames[0] != "web" {
|
|
t.Errorf("ListFrontends: got %v, want [web]", resp.FrontendNames)
|
|
}
|
|
}
|
|
|
|
func TestGetFrontend(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
info, err := client.GetFrontend(ctx, &GetFrontendRequest{Name: "web"})
|
|
if err != nil {
|
|
t.Fatalf("GetFrontend: %v", err)
|
|
}
|
|
if info.Address != "192.0.2.1" {
|
|
t.Errorf("GetFrontend address: got %q, want 192.0.2.1", info.Address)
|
|
}
|
|
if info.Port != 80 {
|
|
t.Errorf("GetFrontend port: got %d, want 80", info.Port)
|
|
}
|
|
if len(info.Pools) != 1 || info.Pools[0].Name != "primary" {
|
|
t.Errorf("GetFrontend pools: got %v, want [{primary [be0]}]", info.Pools)
|
|
}
|
|
if len(info.Pools[0].Backends) != 1 || info.Pools[0].Backends[0].Name != "be0" {
|
|
t.Errorf("GetFrontend pools[0].backends: got %v, want [{be0 100}]", info.Pools[0].Backends)
|
|
}
|
|
if info.Pools[0].Backends[0].Weight != 100 {
|
|
t.Errorf("GetFrontend pools[0].backends[0].weight: got %d, want 100", info.Pools[0].Backends[0].Weight)
|
|
}
|
|
}
|
|
|
|
func TestGetFrontendNotFound(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
_, err := client.GetFrontend(ctx, &GetFrontendRequest{Name: "nope"})
|
|
if err == nil {
|
|
t.Error("expected error for unknown frontend")
|
|
}
|
|
}
|
|
|
|
func TestListBackends(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
resp, err := client.ListBackends(ctx, &ListBackendsRequest{})
|
|
if err != nil {
|
|
t.Fatalf("ListBackends: %v", err)
|
|
}
|
|
if len(resp.BackendNames) != 1 || resp.BackendNames[0] != "be0" {
|
|
t.Errorf("ListBackends: got %v, want [be0]", resp.BackendNames)
|
|
}
|
|
}
|
|
|
|
func TestGetBackend(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
info, err := client.GetBackend(ctx, &GetBackendRequest{Name: "be0"})
|
|
if err != nil {
|
|
t.Fatalf("GetBackend: %v", err)
|
|
}
|
|
if info.State != health.StateUnknown.String() {
|
|
t.Errorf("initial state: got %q, want unknown", info.State)
|
|
}
|
|
if !info.Enabled {
|
|
t.Error("expected enabled=true")
|
|
}
|
|
if info.Healthcheck != "icmp" {
|
|
t.Errorf("healthcheck: got %q, want icmp", info.Healthcheck)
|
|
}
|
|
}
|
|
|
|
func TestGetBackendNotFound(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
_, err := client.GetBackend(ctx, &GetBackendRequest{Name: "nope"})
|
|
if err == nil {
|
|
t.Error("expected error for unknown backend")
|
|
}
|
|
}
|
|
|
|
func TestPauseResumeBackend(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
info, err := client.PauseBackend(ctx, &BackendRequest{Name: "be0"})
|
|
if err != nil {
|
|
t.Fatalf("PauseBackend: %v", err)
|
|
}
|
|
if info.State != health.StatePaused.String() {
|
|
t.Errorf("after pause: got %q, want paused", info.State)
|
|
}
|
|
|
|
info, err = client.ResumeBackend(ctx, &BackendRequest{Name: "be0"})
|
|
if err != nil {
|
|
t.Fatalf("ResumeBackend: %v", err)
|
|
}
|
|
if info.State != health.StateUnknown.String() {
|
|
t.Errorf("after resume: got %q, want unknown", info.State)
|
|
}
|
|
}
|
|
|
|
func TestSetFrontendPoolBackendWeight(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
info, err := client.SetFrontendPoolBackendWeight(ctx, &SetWeightRequest{
|
|
Frontend: "web",
|
|
Pool: "primary",
|
|
Backend: "be0",
|
|
Weight: 42,
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("SetFrontendPoolBackendWeight: %v", err)
|
|
}
|
|
if len(info.Pools) == 0 || len(info.Pools[0].Backends) == 0 {
|
|
t.Fatal("response missing pools/backends")
|
|
}
|
|
if info.Pools[0].Backends[0].Weight != 42 {
|
|
t.Errorf("weight: got %d, want 42", info.Pools[0].Backends[0].Weight)
|
|
}
|
|
|
|
// Invalid weight.
|
|
_, err = client.SetFrontendPoolBackendWeight(ctx, &SetWeightRequest{
|
|
Frontend: "web", Pool: "primary", Backend: "be0", Weight: 101,
|
|
})
|
|
if err == nil {
|
|
t.Error("expected error for weight 101")
|
|
}
|
|
|
|
// Unknown frontend.
|
|
_, err = client.SetFrontendPoolBackendWeight(ctx, &SetWeightRequest{
|
|
Frontend: "nope", Pool: "primary", Backend: "be0", Weight: 50,
|
|
})
|
|
if err == nil {
|
|
t.Error("expected error for unknown frontend")
|
|
}
|
|
}
|
|
|
|
func TestEnableDisableBackend(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
info, err := client.DisableBackend(ctx, &BackendRequest{Name: "be0"})
|
|
if err != nil {
|
|
t.Fatalf("DisableBackend: %v", err)
|
|
}
|
|
if info.State != "disabled" {
|
|
t.Errorf("after disable: got %q, want disabled", info.State)
|
|
}
|
|
if info.Enabled {
|
|
t.Error("after disable: Enabled should be false")
|
|
}
|
|
|
|
info, err = client.EnableBackend(ctx, &BackendRequest{Name: "be0"})
|
|
if err != nil {
|
|
t.Fatalf("EnableBackend: %v", err)
|
|
}
|
|
if info.State != "unknown" {
|
|
t.Errorf("after enable: got %q, want unknown", info.State)
|
|
}
|
|
if !info.Enabled {
|
|
t.Error("after enable: Enabled should be true")
|
|
}
|
|
}
|
|
|
|
func TestListHealthChecks(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
resp, err := client.ListHealthChecks(ctx, &ListHealthChecksRequest{})
|
|
if err != nil {
|
|
t.Fatalf("ListHealthChecks: %v", err)
|
|
}
|
|
if len(resp.Names) != 1 || resp.Names[0] != "icmp" {
|
|
t.Errorf("ListHealthChecks: got %v, want [icmp]", resp.Names)
|
|
}
|
|
}
|
|
|
|
func TestGetHealthCheck(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
info, err := client.GetHealthCheck(ctx, &GetHealthCheckRequest{Name: "icmp"})
|
|
if err != nil {
|
|
t.Fatalf("GetHealthCheck: %v", err)
|
|
}
|
|
if info.Type != "icmp" {
|
|
t.Errorf("type: got %q, want icmp", info.Type)
|
|
}
|
|
if info.Fall != 3 || info.Rise != 2 {
|
|
t.Errorf("fall/rise: got %d/%d, want 3/2", info.Fall, info.Rise)
|
|
}
|
|
}
|
|
|
|
func TestGetHealthCheckNotFound(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
_, err := client.GetHealthCheck(ctx, &GetHealthCheckRequest{Name: "nope"})
|
|
if err == nil {
|
|
t.Error("expected error for unknown healthcheck")
|
|
}
|
|
}
|
|
|
|
func TestWatchEventsServerShutdown(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
|
|
// Use a separate server context so we can cancel it independently.
|
|
srvCtx, srvCancel := context.WithCancel(ctx)
|
|
client, cleanup := startTestServer(t, srvCtx, c)
|
|
defer cleanup()
|
|
|
|
stream, err := client.WatchEvents(ctx, &WatchRequest{})
|
|
if err != nil {
|
|
t.Fatalf("WatchEvents: %v", err)
|
|
}
|
|
// Drain the initial synthetic snapshots (one per backend, one per frontend).
|
|
for i := 0; i < 2; i++ {
|
|
if _, err := stream.Recv(); err != nil {
|
|
t.Fatalf("initial Recv %d: %v", i, err)
|
|
}
|
|
}
|
|
|
|
// Cancel the server context; the stream must terminate.
|
|
srvCancel()
|
|
_, err = stream.Recv()
|
|
if err == nil {
|
|
t.Fatal("expected stream to close after server shutdown, got nil error")
|
|
}
|
|
}
|
|
|
|
func TestWatchEventsBackend(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
stream, err := client.WatchEvents(ctx, &WatchRequest{})
|
|
if err != nil {
|
|
t.Fatalf("WatchEvents: %v", err)
|
|
}
|
|
|
|
// Should receive the current state for be0 immediately as a BackendEvent.
|
|
ev, err := stream.Recv()
|
|
if err != nil {
|
|
t.Fatalf("Recv: %v", err)
|
|
}
|
|
be, ok := ev.Event.(*Event_Backend)
|
|
if !ok {
|
|
t.Fatalf("expected BackendEvent, got %T", ev.Event)
|
|
}
|
|
if be.Backend.BackendName != "be0" {
|
|
t.Errorf("initial event: backend=%q, want be0", be.Backend.BackendName)
|
|
}
|
|
}
|
|
|
|
func TestWatchEventsLogOnly(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
f := false
|
|
stream, err := client.WatchEvents(ctx, &WatchRequest{Backend: &f, Frontend: &f})
|
|
if err != nil {
|
|
t.Fatalf("WatchEvents: %v", err)
|
|
}
|
|
|
|
// No initial snapshot should arrive (backend disabled). Verify by checking
|
|
// that the stream has no immediately-readable event.
|
|
recvCh := make(chan *Event, 1)
|
|
go func() {
|
|
ev, _ := stream.Recv()
|
|
recvCh <- ev
|
|
}()
|
|
select {
|
|
case ev := <-recvCh:
|
|
if _, isLog := ev.Event.(*Event_Log); !isLog {
|
|
t.Errorf("expected only LogEvents, got %T", ev.Event)
|
|
}
|
|
case <-time.After(50 * time.Millisecond):
|
|
// expected: no backend snapshot arrived
|
|
}
|
|
}
|
|
|
|
func TestWatchEventsInvalidLogLevel(t *testing.T) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
defer cancel()
|
|
|
|
c := makeTestChecker(ctx)
|
|
client, cleanup := startTestServer(t, ctx, c)
|
|
defer cleanup()
|
|
|
|
// For streaming RPCs the server error arrives on the first Recv, not on the
|
|
// initial call.
|
|
stream, err := client.WatchEvents(ctx, &WatchRequest{LogLevel: "verbose"})
|
|
if err != nil {
|
|
t.Fatalf("WatchEvents: %v", err)
|
|
}
|
|
_, err = stream.Recv()
|
|
if err == nil {
|
|
t.Fatal("expected error for invalid log_level")
|
|
}
|
|
}
|