LB buckets column + health cascade; VPP dump fix; maglevc strictness

SPA (cmd/frontend/web):
- New "lb buckets" column backed by a 1s-debounced GetVPPLBState
  fetch loop with leading+trailing edge coalesce.
- Per-frontend health icon (/⚠️//‼️/) in the Zippy header,
  gated by a settling flag that suppresses ‼️ until the next lb-state
  reconciliation after a backend transition or weight change.
- In-place leaf merge on lb-state so stable bucket values (e.g. "0")
  don't retrigger the Flash animation on every refresh.
- Zippy cards remember open state in a cookie, default closed on
  fresh load; fixed-width frontend-title-name + reserved icon slot
  so headers line up across all cards.
- Clock-drift watchdog in sse.ts that forces a fresh EventSource on
  laptop-wake so the broker emits a resync instead of hanging on a
  dead half-open socket.

Frontend service (cmd/frontend):
- maglevClient.lbStateLoop, trigger on backend transitions +
  vpp-connect, best-effort fetch on refreshAll.
- Admin handlers explicitly wake the lb-state loop after lifecycle
  ops and set-weight (the latter emits no transition event on the
  maglevd side, so the WatchEvents path wouldn't have caught it).
- /favicon.ico served from embedded web/public IPng logo.

VPP integration:
- internal/vpp/lbstate.go: dumpASesForVIP drops Pfx from the dump
  request (setting it silently wipes IPv4 replies in the LB plugin)
  and filters results by prefix on the response side instead, which
  also demuxes multi-VIP-on-same-port cases correctly.

maglevc:
- Walk now returns the unconsumed token tail; dispatch and the
  question listener reject unknown commands with a targeted error
  instead of dumping the full command tree prefixed with garbage.
- On '?', echo the current line (including the '?') before the help
  list so the output reads like birdc.

Checker / prober:
- internal/checker: ±10% jitter on NextInterval so probes across
  restart don't all fire on the same tick.
- internal/prober: HTTP User-Agent now carries the build version
  and project URL.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-13 14:23:34 +02:00
parent 35643fd774
commit 1a1c48ef54
28 changed files with 828 additions and 57 deletions

View File

@@ -6,6 +6,7 @@ import (
"context"
"fmt"
"log/slog"
"math/rand/v2"
"net"
"sort"
"sync"
@@ -586,7 +587,7 @@ func (c *Checker) runProbe(ctx context.Context, name string, pos, total int) {
sleepFor = 30 * time.Second
}
} else {
sleepFor = w.backend.NextInterval(hc.Interval, hc.FastInterval, hc.DownInterval)
sleepFor = jitterInterval(w.backend.NextInterval(hc.Interval, hc.FastInterval, hc.DownInterval))
}
c.mu.RUnlock()
@@ -827,3 +828,14 @@ func staggerDelay(interval time.Duration, pos, total int) time.Duration {
}
return time.Duration(int64(interval) * int64(pos) / int64(total))
}
// jitterInterval scales d by a uniformly-random factor in [0.9, 1.1) so that
// probe schedules across many backends drift apart instead of all firing on
// the same tick after process start (or after a config reload re-staggers them
// onto identical phases).
func jitterInterval(d time.Duration) time.Duration {
if d <= 0 {
return d
}
return time.Duration(float64(d) * (0.9 + 0.2*rand.Float64()))
}

View File

@@ -12,9 +12,12 @@ import (
"strconv"
"strings"
buildinfo "git.ipng.ch/ipng/vpp-maglev/cmd"
"git.ipng.ch/ipng/vpp-maglev/internal/health"
)
var userAgent = "maglev-healthchecker/" + buildinfo.Version() + " (+https://git.ipng.ch/ipng/vpp-maglev)"
// HTTPProbe sends a plain HTTP GET to cfg.Target inside the healthcheck netns.
func HTTPProbe(ctx context.Context, cfg ProbeConfig) health.ProbeResult {
return doHTTPProbe(ctx, cfg, false)
@@ -110,7 +113,7 @@ func doHTTPProbe(ctx context.Context, cfg ProbeConfig, useTLS bool) health.Probe
return health.ProbeResult{OK: false, Layer: health.LayerL7, Code: "L7RSP", Detail: err.Error()}
}
req.Host = hostHeader
req.Header.Set("User-Agent", "maglev-healthchecker/1.0")
req.Header.Set("User-Agent", userAgent)
resp, err := client.Do(req)
if err != nil {

View File

@@ -79,7 +79,7 @@ func (c *Client) GetLBStateAll() (*LBState, error) {
return nil, err
}
for i := range vips {
ases, err := dumpASesForVIP(ch, vips[i].Protocol, vips[i].Port)
ases, err := dumpASesForVIP(ch, vips[i].Prefix, vips[i].Protocol, vips[i].Port)
if err != nil {
return nil, err
}
@@ -159,7 +159,7 @@ func lookupVIP(ch *loggedChannel, prefix *net.IPNet, protocol uint8, port uint16
if all[i].Protocol != protocol || all[i].Port != port {
continue
}
ases, err := dumpASesForVIP(ch, protocol, port)
ases, err := dumpASesForVIP(ch, all[i].Prefix, protocol, port)
if err != nil {
return nil, err
}
@@ -170,13 +170,22 @@ func lookupVIP(ch *loggedChannel, prefix *net.IPNet, protocol uint8, port uint16
}
// dumpASesForVIP returns the application servers bound to the VIP identified
// by (protocol, port). VPP's lb_as_v2_dump filter is used; we also guard
// defensively against replies for other VIPs.
func dumpASesForVIP(ch *loggedChannel, protocol uint8, port uint16) ([]LBAS, error) {
// by (prefix, protocol, port).
//
// VPP's lb_as_v2_dump does not honour the request's Pfx field — the LB
// plugin only filters on (protocol, port), so a single dump call returns
// ASes for every VIP sharing that proto+port pair (e.g. an IPv4 and an
// IPv6 VIP both listening on TCP/80). We do the prefix filter in Go on
// the response side. Earlier we tried setting Pfx in the request as
// well; for reasons unknown that silently dropped every IPv4 reply, so
// this code intentionally leaves Pfx zero and relies entirely on
// post-filtering.
func dumpASesForVIP(ch *loggedChannel, prefix *net.IPNet, protocol uint8, port uint16) ([]LBAS, error) {
req := &lb.LbAsV2Dump{
Protocol: protocol,
Port: port,
}
want := prefix.String()
reqCtx := ch.SendMultiRequest(req)
var out []LBAS
for {
@@ -191,6 +200,9 @@ func dumpASesForVIP(ch *loggedChannel, protocol uint8, port uint16) ([]LBAS, err
if reply.Vip.Port != port || uint8(reply.Vip.Protocol) != protocol {
continue
}
if lbVipPrefix(reply.Vip).String() != want {
continue
}
var inUse time.Time
if reply.InUseSince != 0 {
inUse = time.Unix(int64(reply.InUseSince), 0)