// SPDX-License-Identifier: Apache-2.0 import { createStore, produce } from "solid-js/store"; import type { BackendEventPayload, FrontendEventPayload, FrontendSnapshot, LBStatePayload, MaglevdStatusPayload, StateSnapshot, TransitionRecord, } from "../types"; import { tick } from "./tick"; // recomputeDerivedState mirrors the server-side // health.EffectiveWeights / ActivePoolIndex / ComputeFrontendState // logic so the SPA can keep pool.effective_weight AND the // per-frontend aggregate state correct the moment any backend // transitions or any configured weight changes, without waiting for // the 30s refresh. Walking every frontend is cheap — O(frontends × // pools × backends-per-pool) with tiny constants — and it's // strictly a function of the backend state map + configured // weights, so there's no risk of drift vs. the server as long as // the rules stay identical. The SPA is the authoritative source of // truth for *display* state: the server's cached frontendStates // field can be stale (e.g. after a SetFrontendPoolBackendWeight // call that doesn't re-run updateFrontendState, or after a long- // lived WatchEvents stream where a past transition corrupted the // client's cache) and the SPA recomputes from its own live // backends array to avoid inheriting any staleness. // // Effective weight rule: a backend gets its configured pool weight // iff it is up AND belongs to the currently-active pool; everything // else is 0. The active pool is the first pool containing a backend // that is both up AND has a non-zero configured weight — a pool // whose up backends are all weight=0 contributes no serving // capacity and gets skipped over in priority failover. Kept in // lock-step with internal/health/weights.go ActivePoolIndex. // // Frontend state rule: unknown if no backends or every referenced // backend is still in StateUnknown; up if any backend in any pool // has effective_weight > 0; otherwise down. Kept in lock-step with // internal/health/weights.go ComputeFrontendState. function recomputeDerivedState(snap: StateSnapshot) { const stateOf: Record = {}; for (const b of snap.backends) stateOf[b.name] = b.state; for (const fe of snap.frontends) { let activePool = 0; for (let i = 0; i < fe.pools.length; i++) { let anyServing = false; for (const pb of fe.pools[i].backends) { if (stateOf[pb.name] === "up" && pb.weight > 0) { anyServing = true; break; } } if (anyServing) { activePool = i; break; } } let anyEffective = false; let seenAny = false; let allUnknown = true; const seen = new Set(); for (let i = 0; i < fe.pools.length; i++) { for (const pb of fe.pools[i].backends) { const st = stateOf[pb.name]; pb.effective_weight = st === "up" && i === activePool ? pb.weight : 0; if (pb.effective_weight > 0) anyEffective = true; if (!seen.has(pb.name)) { seen.add(pb.name); seenAny = true; if (st !== "unknown") allUnknown = false; } } } if (!seenAny || allUnknown) { fe.state = "unknown"; } else if (anyEffective) { fe.state = "up"; } else { fe.state = "down"; } } } // FrontendState keys snapshots by maglevd name. A single store drives the // whole UI; reducers produce() into the right branch. // // settling is a per-(maglevd, frontend) flag flipped to true on any // event that changes which backends should be serving — backend // transitions, configured weight edits — and auto-cleared after a // fixed grace window. While true, frontendHealth suppresses the // bug-buckets verdict so a transient race between the new control- // plane state and the lagging GetVPPLBState refetch doesn't flash // the ‼️ icon. A real, persistent dataplane disagreement still shows // up the moment the grace window expires. export type FrontendState = { byName: Record; settling: Record>; }; const [state, setState] = createStore({ byName: {}, settling: {} }); export { state }; const SETTLE_GRACE_MS = 2000; // Outside-the-store map of pending auto-clear timers, keyed by // (maglevd, frontend). Timer ids aren't UI state so they don't // belong in the reactive store; keeping them in a plain Map lets a // fresh transition cancel and restart the timer cleanly. const settlingTimers = new Map>(); function settleKey(m: string, f: string): string { return `${m}\x00${f}`; } function markFrontendSettling(maglevd: string, frontend: string) { setState( produce((s) => { if (!s.settling[maglevd]) s.settling[maglevd] = {}; s.settling[maglevd][frontend] = true; }), ); const k = settleKey(maglevd, frontend); const existing = settlingTimers.get(k); if (existing) clearTimeout(existing); settlingTimers.set( k, setTimeout(() => { settlingTimers.delete(k); setState( produce((s) => { if (s.settling[maglevd]) delete s.settling[maglevd][frontend]; }), ); }, SETTLE_GRACE_MS), ); } // clearMaglevdSettling is called from applyLBState the moment a fresh // GetVPPLBState reconciliation lands. The dataplane data is now at // least as new as whatever transitions triggered the wait, so any // remaining bug-buckets discrepancy is real and worth surfacing. // The 2s safety timer in markFrontendSettling exists only as a // fallback for the case where VPP is disconnected (or the fetch is // failing) and an lb-state event would never arrive — without the // timer, settling would get stuck and the icon would silently // suppress real bugs. function clearMaglevdSettling(maglevd: string) { for (const [k, id] of settlingTimers) { if (k.startsWith(maglevd + "\x00")) { clearTimeout(id); settlingTimers.delete(k); } } setState( produce((s) => { if (s.settling[maglevd]) s.settling[maglevd] = {}; }), ); } export function replaceSnapshot(snap: StateSnapshot) { // Recompute effective weights + aggregate frontend state locally // from the snapshot's backends array, rather than trusting the // server's state field verbatim. The server can be stale (the // checker's frontendStates map is only updated on backend // transitions, not on weight changes), so deriving from our own // backend data is the only way to guarantee the display stays // consistent with reality. recomputeDerivedState(snap); setState( produce((s) => { s.byName[snap.maglevd.name] = snap; }), ); } export function replaceAll(snaps: StateSnapshot[]) { const byName: Record = {}; for (const s of snaps) { recomputeDerivedState(s); byName[s.maglevd.name] = s; } setState({ byName }); } export function applyBackendTransition(maglevd: string, p: BackendEventPayload) { setState( produce((s) => { const snap = s.byName[maglevd]; if (!snap) return; const b = snap.backends.find((x) => x.name === p.backend); if (!b) return; b.state = p.transition.to; // Derive enabled from state — see the matching comment in // cmd/frontend/client.go applyBackendTransition. state="disabled" // and enabled=false are two expressions of the same condition // in maglevd, so keeping them in sync locally closes a drift // window where the UI would show the wrong [disabled] tag. b.enabled = p.transition.to !== "disabled"; b.last_transition = p.transition; if (!b.transitions) b.transitions = []; b.transitions.push(p.transition); if (b.transitions.length > 20) { b.transitions = b.transitions.slice(b.transitions.length - 20); } // A backend state change can shift which pool is active and // therefore which pool-memberships get non-zero effective // weights, and in turn can flip the frontend's aggregate // state. Recompute for every frontend — not just the one // pointed at by this backend — because pool-failover is a // per-frontend computation and the same backend can appear in // multiple frontends with different pool placements. recomputeDerivedState(snap); }), ); // Mark every frontend that references this backend as settling so // the bug-buckets verdict is gated on the next fresh GetVPPLBState // reconciliation (or the 2s safety timer, whichever fires first). const snap = state.byName[maglevd]; if (snap) { for (const fe of snap.frontends) { if (fe.pools.some((pool) => pool.backends.some((pb) => pb.name === p.backend))) { markFrontendSettling(maglevd, fe.name); } } } } // Frontend-transition events arrive from the server's checker, but // the SPA no longer trusts their `to` field — recomputeDerivedState // walks the local backends array on every backend event and every // hydration to produce an up-to-date frontend state that the server // can't make stale. Kept as a named reducer so sse.ts's dispatch // table still has a landing spot for "frontend" events (they also // flow into the DebugPanel via pushEvent); the body is deliberately // empty — not a bug. export function applyFrontendTransition(_maglevd: string, _p: FrontendEventPayload) { // no-op — state is derived client-side, see recomputeDerivedState } // applyLBState merges the per-frontend bucket map for one maglevd // from a freshly-arrived "lb-state" SSE event. A null/undefined // per_frontend payload (sent on VPP disconnect or fetch failure) // clears the cached map so the SPA renders em-dashes in the buckets // column instead of stale numbers. // // The merge is done leaf-by-leaf rather than via wholesale assignment. // produce's proxy only emits a signal when a property is actually // written, so guarding each write with `!==` keeps unchanged numbers // (in particular every drained-to-0 backend) from invalidating their // downstream reactive reads. Without this, the periodic 30s refresh // and every same-value re-fetch would re-trigger the Flash animation // on every cell — which is exactly the visual storm we're avoiding. export function applyLBState(maglevd: string, p: LBStatePayload) { setState( produce((s) => { const snap = s.byName[maglevd]; if (!snap) return; const next = p.per_frontend; const empty = !next || Object.keys(next).length === 0; if (empty) { if (snap.lb_state !== undefined) snap.lb_state = undefined; return; } if (!snap.lb_state) { snap.lb_state = { per_frontend: {} }; } const cur = snap.lb_state.per_frontend; // Update / insert leaves that actually changed. for (const fe of Object.keys(next)) { if (!cur[fe]) cur[fe] = {}; const curRow = cur[fe]; const nextRow = next[fe]; for (const be of Object.keys(nextRow)) { if (curRow[be] !== nextRow[be]) curRow[be] = nextRow[be]; } for (const be of Object.keys(curRow)) { if (!(be in nextRow)) delete curRow[be]; } } // Drop frontends that disappeared from the new snapshot. for (const fe of Object.keys(cur)) { if (!(fe in next)) delete cur[fe]; } }), ); // A fresh lb-state event means the dataplane data is now at least // as new as anything we were waiting on — re-enable bug detection. clearMaglevdSettling(maglevd); } // lbBucketsFor looks up the bucket count VPP currently routes to a // given backend on a given frontend. Returns undefined when the // snapshot has no LB state at all (VPP disconnected, no fetch yet) or // when the backend isn't programmed into VPP for that VIP — the view // renders an em-dash in both cases. export function lbBucketsFor( snap: StateSnapshot | undefined, frontend: string, backend: string, ): number | undefined { return snap?.lb_state?.per_frontend?.[frontend]?.[backend]; } export function applyVPPStatus(maglevd: string, state: string) { setState( produce((s) => { const snap = s.byName[maglevd]; if (!snap) return; snap.vpp_state = state; }), ); } export function applyMaglevdStatus(maglevd: string, p: MaglevdStatusPayload) { setState( produce((s) => { const snap = s.byName[maglevd]; if (!snap) return; snap.maglevd.connected = p.connected; snap.maglevd.last_error = p.last_error; }), ); } // applyConfiguredWeight updates the configured weight of a specific // backend's pool-membership within a named frontend/pool, then // recomputes effective weights so pool-failover semantics stay // consistent. Called from the BackendActionsMenu after a successful // admin "set weight" POST so the UI reflects the change instantly // without waiting for the 30s refresh tick. Unlike the previous // log-event-driven reducer, this one is scoped to exactly the // pool-membership the operator edited, so it can't leak weights // across frontends that share the backend. export function applyConfiguredWeight( maglevd: string, frontend: string, pool: string, backend: string, weight: number, ) { setState( produce((s) => { const snap = s.byName[maglevd]; if (!snap) return; const fe = snap.frontends.find((f) => f.name === frontend); if (!fe) return; const p = fe.pools.find((x) => x.name === pool); if (!p) return; const pb = p.backends.find((x) => x.name === backend); if (!pb) return; pb.weight = weight; recomputeDerivedState(snap); }), ); markFrontendSettling(maglevd, frontend); } // FrontendHealth is the per-frontend "is everything actually working" // verdict computed from backend states, effective weights, and (when // available) the VPP bucket map. The cascade is intentionally // priority-ordered: a data-plane disagreement (control says serve, // VPP routes nothing) is the loudest signal because it usually means // something is broken in the sync path, not just an unhealthy backend. // // "ok" → all backends up, primary serving, every // eff>0 backend has VPP buckets>0 // "bug-buckets" → some backend with effective_weight>0 has 0 // buckets in VPP — control plane and data // plane disagree, almost always a bug // "primary-drained" → primary pool is not serving any traffic // (every backend in pool[0] has eff=0); the // frontend is on its fallback or fully down // "degraded" → at least one backend isn't 'up' but nothing // worse — typical maintenance / outage state // "unknown" → fallthrough; should be unreachable, kept as // a safety net for logic bugs in this function export type FrontendHealth = "ok" | "bug-buckets" | "primary-drained" | "degraded" | "unknown"; export function frontendHealth(snap: StateSnapshot, fe: FrontendSnapshot): FrontendHealth { const stateOf: Record = {}; for (const b of snap.backends) stateOf[b.name] = b.state; // The bucket check is only meaningful when we actually have an LB // state snapshot. On a fresh page load (or with VPP disconnected) // lb_state is undefined; in that window we fall back to "trust the // control plane" so the icon still settles to ✅ instead of // perpetual ❓ until the first GetVPPLBState round-trip. const lbAvailable = !!snap.lb_state; const feBuckets = snap.lb_state?.per_frontend?.[fe.name]; // Reactive read of the per-frontend settling flag. While true, // we're still waiting for the next GetVPPLBState reconciliation // after a recent control-plane change; the dataplane may be mid- // reconverge so any "weight>0 but buckets==0" we'd see here is // almost certainly a race, not a real bug. const settling = !!state.settling[snap.maglevd.name]?.[fe.name]; let anyDown = false; let dataplaneBug = false; for (const pool of fe.pools) { for (const pb of pool.backends) { if (stateOf[pb.name] !== "up") anyDown = true; if (!settling && lbAvailable && pb.effective_weight > 0) { const b = feBuckets?.[pb.name]; if (b === undefined || b === 0) dataplaneBug = true; } } } const primary = fe.pools[0]; const primaryHasWeights = !!primary && primary.backends.some((pb) => pb.weight > 0); const primaryAllZero = !primary || primary.backends.every((pb) => pb.effective_weight === 0); if (!anyDown && primaryHasWeights && !dataplaneBug) return "ok"; if (dataplaneBug) return "bug-buckets"; if (primaryAllZero) return "primary-drained"; if (anyDown) return "degraded"; return "unknown"; } export function frontendHealthIcon(snap: StateSnapshot, fe: FrontendSnapshot): string { switch (frontendHealth(snap, fe)) { case "ok": return "✅"; case "bug-buckets": return "‼️"; case "primary-drained": return "❗"; case "degraded": return "⚠️"; case "unknown": return "❓"; } } // Helpers used by views. // formatVIPAddress renders an address:port string with IPv6 addresses // wrapped in square brackets. This matches the URL-authority // convention (RFC 3986 §3.2.2) — without the brackets the colons in // an IPv6 literal are ambiguous against the port separator. IPv4 is // left bare. export function formatVIPAddress(address: string, port: number): string { if (address.includes(":")) return `[${address}]:${port}`; return `${address}:${port}`; } export function lastTransitionAge(t?: TransitionRecord): string { // Subscribe to the 1s ticker so the age string updates live as a // real-time countdown. No effect on layout — the age column is // unwrapped so the Flash animation never fires for these periodic // updates. tick(); if (!t || !t.at_unix_ns || t.at_unix_ns <= 0) return ""; const ms = Date.now() - t.at_unix_ns / 1e6; const totalSec = Math.floor(ms / 1000); // Clock skew between maglevd and the browser, plus the fact that // "1s ago" reads awkwardly, means anything at or below 1s is best // rendered as "now". Also catches negative values from a future- // skewed server clock. if (totalSec <= 1) return "now"; // Render the two most significant units so fresh transitions show // sub-minute detail ("10m30s") while older transitions round cleanly // ("1d16h"). A single unit is shown only below one minute, since // "Xs" has nothing smaller beneath it. const s = totalSec % 60; const totalMin = Math.floor(totalSec / 60); if (totalMin < 1) return `${totalSec}s ago`; const m = totalMin % 60; const totalHr = Math.floor(totalMin / 60); if (totalHr < 1) return `${m}m${s}s ago`; const h = totalHr % 24; const d = Math.floor(totalHr / 24); if (d < 1) return `${totalHr}h${m}m ago`; return `${d}d${h}h ago`; }