LB buckets column + health cascade; VPP dump fix; maglevc strictness
SPA (cmd/frontend/web): - New "lb buckets" column backed by a 1s-debounced GetVPPLBState fetch loop with leading+trailing edge coalesce. - Per-frontend health icon (✅/⚠️/❗/‼️/❓) in the Zippy header, gated by a settling flag that suppresses ‼️ until the next lb-state reconciliation after a backend transition or weight change. - In-place leaf merge on lb-state so stable bucket values (e.g. "0") don't retrigger the Flash animation on every refresh. - Zippy cards remember open state in a cookie, default closed on fresh load; fixed-width frontend-title-name + reserved icon slot so headers line up across all cards. - Clock-drift watchdog in sse.ts that forces a fresh EventSource on laptop-wake so the broker emits a resync instead of hanging on a dead half-open socket. Frontend service (cmd/frontend): - maglevClient.lbStateLoop, trigger on backend transitions + vpp-connect, best-effort fetch on refreshAll. - Admin handlers explicitly wake the lb-state loop after lifecycle ops and set-weight (the latter emits no transition event on the maglevd side, so the WatchEvents path wouldn't have caught it). - /favicon.ico served from embedded web/public IPng logo. VPP integration: - internal/vpp/lbstate.go: dumpASesForVIP drops Pfx from the dump request (setting it silently wipes IPv4 replies in the LB plugin) and filters results by prefix on the response side instead, which also demuxes multi-VIP-on-same-port cases correctly. maglevc: - Walk now returns the unconsumed token tail; dispatch and the question listener reject unknown commands with a targeted error instead of dumping the full command tree prefixed with garbage. - On '?', echo the current line (including the '?') before the help list so the output reads like birdc. Checker / prober: - internal/checker: ±10% jitter on NextInterval so probes across restart don't all fire on the same tick. - internal/prober: HTTP User-Agent now carries the build version and project URL. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,8 @@ import { createStore, produce } from "solid-js/store";
|
||||
import type {
|
||||
BackendEventPayload,
|
||||
FrontendEventPayload,
|
||||
FrontendSnapshot,
|
||||
LBStatePayload,
|
||||
MaglevdStatusPayload,
|
||||
StateSnapshot,
|
||||
TransitionRecord,
|
||||
@@ -83,14 +85,81 @@ function recomputeDerivedState(snap: StateSnapshot) {
|
||||
|
||||
// FrontendState keys snapshots by maglevd name. A single store drives the
|
||||
// whole UI; reducers produce() into the right branch.
|
||||
//
|
||||
// settling is a per-(maglevd, frontend) flag flipped to true on any
|
||||
// event that changes which backends should be serving — backend
|
||||
// transitions, configured weight edits — and auto-cleared after a
|
||||
// fixed grace window. While true, frontendHealth suppresses the
|
||||
// bug-buckets verdict so a transient race between the new control-
|
||||
// plane state and the lagging GetVPPLBState refetch doesn't flash
|
||||
// the ‼️ icon. A real, persistent dataplane disagreement still shows
|
||||
// up the moment the grace window expires.
|
||||
export type FrontendState = {
|
||||
byName: Record<string, StateSnapshot>;
|
||||
settling: Record<string, Record<string, true>>;
|
||||
};
|
||||
|
||||
const [state, setState] = createStore<FrontendState>({ byName: {} });
|
||||
const [state, setState] = createStore<FrontendState>({ byName: {}, settling: {} });
|
||||
|
||||
export { state };
|
||||
|
||||
const SETTLE_GRACE_MS = 2000;
|
||||
|
||||
// Outside-the-store map of pending auto-clear timers, keyed by
|
||||
// (maglevd, frontend). Timer ids aren't UI state so they don't
|
||||
// belong in the reactive store; keeping them in a plain Map lets a
|
||||
// fresh transition cancel and restart the timer cleanly.
|
||||
const settlingTimers = new Map<string, ReturnType<typeof setTimeout>>();
|
||||
function settleKey(m: string, f: string): string {
|
||||
return `${m}\x00${f}`;
|
||||
}
|
||||
|
||||
function markFrontendSettling(maglevd: string, frontend: string) {
|
||||
setState(
|
||||
produce((s) => {
|
||||
if (!s.settling[maglevd]) s.settling[maglevd] = {};
|
||||
s.settling[maglevd][frontend] = true;
|
||||
}),
|
||||
);
|
||||
const k = settleKey(maglevd, frontend);
|
||||
const existing = settlingTimers.get(k);
|
||||
if (existing) clearTimeout(existing);
|
||||
settlingTimers.set(
|
||||
k,
|
||||
setTimeout(() => {
|
||||
settlingTimers.delete(k);
|
||||
setState(
|
||||
produce((s) => {
|
||||
if (s.settling[maglevd]) delete s.settling[maglevd][frontend];
|
||||
}),
|
||||
);
|
||||
}, SETTLE_GRACE_MS),
|
||||
);
|
||||
}
|
||||
|
||||
// clearMaglevdSettling is called from applyLBState the moment a fresh
|
||||
// GetVPPLBState reconciliation lands. The dataplane data is now at
|
||||
// least as new as whatever transitions triggered the wait, so any
|
||||
// remaining bug-buckets discrepancy is real and worth surfacing.
|
||||
// The 2s safety timer in markFrontendSettling exists only as a
|
||||
// fallback for the case where VPP is disconnected (or the fetch is
|
||||
// failing) and an lb-state event would never arrive — without the
|
||||
// timer, settling would get stuck and the icon would silently
|
||||
// suppress real bugs.
|
||||
function clearMaglevdSettling(maglevd: string) {
|
||||
for (const [k, id] of settlingTimers) {
|
||||
if (k.startsWith(maglevd + "\x00")) {
|
||||
clearTimeout(id);
|
||||
settlingTimers.delete(k);
|
||||
}
|
||||
}
|
||||
setState(
|
||||
produce((s) => {
|
||||
if (s.settling[maglevd]) s.settling[maglevd] = {};
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
export function replaceSnapshot(snap: StateSnapshot) {
|
||||
// Recompute effective weights + aggregate frontend state locally
|
||||
// from the snapshot's backends array, rather than trusting the
|
||||
@@ -146,6 +215,17 @@ export function applyBackendTransition(maglevd: string, p: BackendEventPayload)
|
||||
recomputeDerivedState(snap);
|
||||
}),
|
||||
);
|
||||
// Mark every frontend that references this backend as settling so
|
||||
// the bug-buckets verdict is gated on the next fresh GetVPPLBState
|
||||
// reconciliation (or the 2s safety timer, whichever fires first).
|
||||
const snap = state.byName[maglevd];
|
||||
if (snap) {
|
||||
for (const fe of snap.frontends) {
|
||||
if (fe.pools.some((pool) => pool.backends.some((pb) => pb.name === p.backend))) {
|
||||
markFrontendSettling(maglevd, fe.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Frontend-transition events arrive from the server's checker, but
|
||||
@@ -160,6 +240,70 @@ export function applyFrontendTransition(_maglevd: string, _p: FrontendEventPaylo
|
||||
// no-op — state is derived client-side, see recomputeDerivedState
|
||||
}
|
||||
|
||||
// applyLBState merges the per-frontend bucket map for one maglevd
|
||||
// from a freshly-arrived "lb-state" SSE event. A null/undefined
|
||||
// per_frontend payload (sent on VPP disconnect or fetch failure)
|
||||
// clears the cached map so the SPA renders em-dashes in the buckets
|
||||
// column instead of stale numbers.
|
||||
//
|
||||
// The merge is done leaf-by-leaf rather than via wholesale assignment.
|
||||
// produce's proxy only emits a signal when a property is actually
|
||||
// written, so guarding each write with `!==` keeps unchanged numbers
|
||||
// (in particular every drained-to-0 backend) from invalidating their
|
||||
// downstream reactive reads. Without this, the periodic 30s refresh
|
||||
// and every same-value re-fetch would re-trigger the Flash animation
|
||||
// on every cell — which is exactly the visual storm we're avoiding.
|
||||
export function applyLBState(maglevd: string, p: LBStatePayload) {
|
||||
setState(
|
||||
produce((s) => {
|
||||
const snap = s.byName[maglevd];
|
||||
if (!snap) return;
|
||||
const next = p.per_frontend;
|
||||
const empty = !next || Object.keys(next).length === 0;
|
||||
if (empty) {
|
||||
if (snap.lb_state !== undefined) snap.lb_state = undefined;
|
||||
return;
|
||||
}
|
||||
if (!snap.lb_state) {
|
||||
snap.lb_state = { per_frontend: {} };
|
||||
}
|
||||
const cur = snap.lb_state.per_frontend;
|
||||
// Update / insert leaves that actually changed.
|
||||
for (const fe of Object.keys(next)) {
|
||||
if (!cur[fe]) cur[fe] = {};
|
||||
const curRow = cur[fe];
|
||||
const nextRow = next[fe];
|
||||
for (const be of Object.keys(nextRow)) {
|
||||
if (curRow[be] !== nextRow[be]) curRow[be] = nextRow[be];
|
||||
}
|
||||
for (const be of Object.keys(curRow)) {
|
||||
if (!(be in nextRow)) delete curRow[be];
|
||||
}
|
||||
}
|
||||
// Drop frontends that disappeared from the new snapshot.
|
||||
for (const fe of Object.keys(cur)) {
|
||||
if (!(fe in next)) delete cur[fe];
|
||||
}
|
||||
}),
|
||||
);
|
||||
// A fresh lb-state event means the dataplane data is now at least
|
||||
// as new as anything we were waiting on — re-enable bug detection.
|
||||
clearMaglevdSettling(maglevd);
|
||||
}
|
||||
|
||||
// lbBucketsFor looks up the bucket count VPP currently routes to a
|
||||
// given backend on a given frontend. Returns undefined when the
|
||||
// snapshot has no LB state at all (VPP disconnected, no fetch yet) or
|
||||
// when the backend isn't programmed into VPP for that VIP — the view
|
||||
// renders an em-dash in both cases.
|
||||
export function lbBucketsFor(
|
||||
snap: StateSnapshot | undefined,
|
||||
frontend: string,
|
||||
backend: string,
|
||||
): number | undefined {
|
||||
return snap?.lb_state?.per_frontend?.[frontend]?.[backend];
|
||||
}
|
||||
|
||||
export function applyVPPStatus(maglevd: string, state: string) {
|
||||
setState(
|
||||
produce((s) => {
|
||||
@@ -211,6 +355,89 @@ export function applyConfiguredWeight(
|
||||
recomputeDerivedState(snap);
|
||||
}),
|
||||
);
|
||||
markFrontendSettling(maglevd, frontend);
|
||||
}
|
||||
|
||||
// FrontendHealth is the per-frontend "is everything actually working"
|
||||
// verdict computed from backend states, effective weights, and (when
|
||||
// available) the VPP bucket map. The cascade is intentionally
|
||||
// priority-ordered: a data-plane disagreement (control says serve,
|
||||
// VPP routes nothing) is the loudest signal because it usually means
|
||||
// something is broken in the sync path, not just an unhealthy backend.
|
||||
//
|
||||
// "ok" → all backends up, primary serving, every
|
||||
// eff>0 backend has VPP buckets>0
|
||||
// "bug-buckets" → some backend with effective_weight>0 has 0
|
||||
// buckets in VPP — control plane and data
|
||||
// plane disagree, almost always a bug
|
||||
// "primary-drained" → primary pool is not serving any traffic
|
||||
// (every backend in pool[0] has eff=0); the
|
||||
// frontend is on its fallback or fully down
|
||||
// "degraded" → at least one backend isn't 'up' but nothing
|
||||
// worse — typical maintenance / outage state
|
||||
// "unknown" → fallthrough; should be unreachable, kept as
|
||||
// a safety net for logic bugs in this function
|
||||
export type FrontendHealth =
|
||||
| "ok"
|
||||
| "bug-buckets"
|
||||
| "primary-drained"
|
||||
| "degraded"
|
||||
| "unknown";
|
||||
|
||||
export function frontendHealth(snap: StateSnapshot, fe: FrontendSnapshot): FrontendHealth {
|
||||
const stateOf: Record<string, string> = {};
|
||||
for (const b of snap.backends) stateOf[b.name] = b.state;
|
||||
|
||||
// The bucket check is only meaningful when we actually have an LB
|
||||
// state snapshot. On a fresh page load (or with VPP disconnected)
|
||||
// lb_state is undefined; in that window we fall back to "trust the
|
||||
// control plane" so the icon still settles to ✅ instead of
|
||||
// perpetual ❓ until the first GetVPPLBState round-trip.
|
||||
const lbAvailable = !!snap.lb_state;
|
||||
const feBuckets = snap.lb_state?.per_frontend?.[fe.name];
|
||||
// Reactive read of the per-frontend settling flag. While true,
|
||||
// we're still waiting for the next GetVPPLBState reconciliation
|
||||
// after a recent control-plane change; the dataplane may be mid-
|
||||
// reconverge so any "weight>0 but buckets==0" we'd see here is
|
||||
// almost certainly a race, not a real bug.
|
||||
const settling = !!state.settling[snap.maglevd.name]?.[fe.name];
|
||||
|
||||
let anyDown = false;
|
||||
let dataplaneBug = false;
|
||||
for (const pool of fe.pools) {
|
||||
for (const pb of pool.backends) {
|
||||
if (stateOf[pb.name] !== "up") anyDown = true;
|
||||
if (!settling && lbAvailable && pb.effective_weight > 0) {
|
||||
const b = feBuckets?.[pb.name];
|
||||
if (b === undefined || b === 0) dataplaneBug = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const primary = fe.pools[0];
|
||||
const primaryHasWeights = !!primary && primary.backends.some((pb) => pb.weight > 0);
|
||||
const primaryAllZero = !primary || primary.backends.every((pb) => pb.effective_weight === 0);
|
||||
|
||||
if (!anyDown && primaryHasWeights && !dataplaneBug) return "ok";
|
||||
if (dataplaneBug) return "bug-buckets";
|
||||
if (primaryAllZero) return "primary-drained";
|
||||
if (anyDown) return "degraded";
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
export function frontendHealthIcon(snap: StateSnapshot, fe: FrontendSnapshot): string {
|
||||
switch (frontendHealth(snap, fe)) {
|
||||
case "ok":
|
||||
return "✅";
|
||||
case "bug-buckets":
|
||||
return "‼️";
|
||||
case "primary-drained":
|
||||
return "❗";
|
||||
case "degraded":
|
||||
return "⚠️";
|
||||
case "unknown":
|
||||
return "❓";
|
||||
}
|
||||
}
|
||||
|
||||
// Helpers used by views.
|
||||
|
||||
Reference in New Issue
Block a user