Files
vpp-maglev/cmd/frontend/web/src/stores/state.ts

481 lines
19 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// SPDX-License-Identifier: Apache-2.0
import { createStore, produce } from "solid-js/store";
import type {
BackendEventPayload,
FrontendEventPayload,
FrontendSnapshot,
LBStatePayload,
MaglevdStatusPayload,
StateSnapshot,
TransitionRecord,
} from "../types";
import { tick } from "./tick";
// recomputeDerivedState mirrors the server-side
// health.EffectiveWeights / ActivePoolIndex / ComputeFrontendState
// logic so the SPA can keep pool.effective_weight AND the
// per-frontend aggregate state correct the moment any backend
// transitions or any configured weight changes, without waiting for
// the 30s refresh. Walking every frontend is cheap — O(frontends ×
// pools × backends-per-pool) with tiny constants — and it's
// strictly a function of the backend state map + configured
// weights, so there's no risk of drift vs. the server as long as
// the rules stay identical. The SPA is the authoritative source of
// truth for *display* state: the server's cached frontendStates
// field can be stale (e.g. after a SetFrontendPoolBackendWeight
// call that doesn't re-run updateFrontendState, or after a long-
// lived WatchEvents stream where a past transition corrupted the
// client's cache) and the SPA recomputes from its own live
// backends array to avoid inheriting any staleness.
//
// Effective weight rule: a backend gets its configured pool weight
// iff it is up AND belongs to the currently-active pool; everything
// else is 0. The active pool is the first pool containing a backend
// that is both up AND has a non-zero configured weight — a pool
// whose up backends are all weight=0 contributes no serving
// capacity and gets skipped over in priority failover. Kept in
// lock-step with internal/health/weights.go ActivePoolIndex.
//
// Frontend state rule: unknown if no backends or every referenced
// backend is still in StateUnknown; up if any backend in any pool
// has effective_weight > 0; otherwise down. Kept in lock-step with
// internal/health/weights.go ComputeFrontendState.
function recomputeDerivedState(snap: StateSnapshot) {
const stateOf: Record<string, string> = {};
for (const b of snap.backends) stateOf[b.name] = b.state;
for (const fe of snap.frontends) {
let activePool = 0;
for (let i = 0; i < fe.pools.length; i++) {
let anyServing = false;
for (const pb of fe.pools[i].backends) {
if (stateOf[pb.name] === "up" && pb.weight > 0) {
anyServing = true;
break;
}
}
if (anyServing) {
activePool = i;
break;
}
}
let anyEffective = false;
let seenAny = false;
let allUnknown = true;
const seen = new Set<string>();
for (let i = 0; i < fe.pools.length; i++) {
for (const pb of fe.pools[i].backends) {
const st = stateOf[pb.name];
pb.effective_weight = st === "up" && i === activePool ? pb.weight : 0;
if (pb.effective_weight > 0) anyEffective = true;
if (!seen.has(pb.name)) {
seen.add(pb.name);
seenAny = true;
if (st !== "unknown") allUnknown = false;
}
}
}
if (!seenAny || allUnknown) {
fe.state = "unknown";
} else if (anyEffective) {
fe.state = "up";
} else {
fe.state = "down";
}
}
}
// FrontendState keys snapshots by maglevd name. A single store drives the
// whole UI; reducers produce() into the right branch.
//
// settling is a per-(maglevd, frontend) flag flipped to true on any
// event that changes which backends should be serving — backend
// transitions, configured weight edits — and auto-cleared after a
// fixed grace window. While true, frontendHealth suppresses the
// bug-buckets verdict so a transient race between the new control-
// plane state and the lagging GetVPPLBState refetch doesn't flash
// the ‼️ icon. A real, persistent dataplane disagreement still shows
// up the moment the grace window expires.
export type FrontendState = {
byName: Record<string, StateSnapshot>;
settling: Record<string, Record<string, true>>;
};
const [state, setState] = createStore<FrontendState>({ byName: {}, settling: {} });
export { state };
const SETTLE_GRACE_MS = 2000;
// Outside-the-store map of pending auto-clear timers, keyed by
// (maglevd, frontend). Timer ids aren't UI state so they don't
// belong in the reactive store; keeping them in a plain Map lets a
// fresh transition cancel and restart the timer cleanly.
const settlingTimers = new Map<string, ReturnType<typeof setTimeout>>();
function settleKey(m: string, f: string): string {
return `${m}\x00${f}`;
}
function markFrontendSettling(maglevd: string, frontend: string) {
setState(
produce((s) => {
if (!s.settling[maglevd]) s.settling[maglevd] = {};
s.settling[maglevd][frontend] = true;
}),
);
const k = settleKey(maglevd, frontend);
const existing = settlingTimers.get(k);
if (existing) clearTimeout(existing);
settlingTimers.set(
k,
setTimeout(() => {
settlingTimers.delete(k);
setState(
produce((s) => {
if (s.settling[maglevd]) delete s.settling[maglevd][frontend];
}),
);
}, SETTLE_GRACE_MS),
);
}
// clearMaglevdSettling is called from applyLBState the moment a fresh
// GetVPPLBState reconciliation lands. The dataplane data is now at
// least as new as whatever transitions triggered the wait, so any
// remaining bug-buckets discrepancy is real and worth surfacing.
// The 2s safety timer in markFrontendSettling exists only as a
// fallback for the case where VPP is disconnected (or the fetch is
// failing) and an lb-state event would never arrive — without the
// timer, settling would get stuck and the icon would silently
// suppress real bugs.
function clearMaglevdSettling(maglevd: string) {
for (const [k, id] of settlingTimers) {
if (k.startsWith(maglevd + "\x00")) {
clearTimeout(id);
settlingTimers.delete(k);
}
}
setState(
produce((s) => {
if (s.settling[maglevd]) s.settling[maglevd] = {};
}),
);
}
export function replaceSnapshot(snap: StateSnapshot) {
// Recompute effective weights + aggregate frontend state locally
// from the snapshot's backends array, rather than trusting the
// server's state field verbatim. The server can be stale (the
// checker's frontendStates map is only updated on backend
// transitions, not on weight changes), so deriving from our own
// backend data is the only way to guarantee the display stays
// consistent with reality.
recomputeDerivedState(snap);
setState(
produce((s) => {
s.byName[snap.maglevd.name] = snap;
}),
);
}
export function replaceAll(snaps: StateSnapshot[]) {
const byName: Record<string, StateSnapshot> = {};
for (const s of snaps) {
recomputeDerivedState(s);
byName[s.maglevd.name] = s;
}
setState({ byName });
}
export function applyBackendTransition(maglevd: string, p: BackendEventPayload) {
setState(
produce((s) => {
const snap = s.byName[maglevd];
if (!snap) return;
const b = snap.backends.find((x) => x.name === p.backend);
if (!b) return;
b.state = p.transition.to;
// Derive enabled from state — see the matching comment in
// cmd/frontend/client.go applyBackendTransition. state="disabled"
// and enabled=false are two expressions of the same condition
// in maglevd, so keeping them in sync locally closes a drift
// window where the UI would show the wrong [disabled] tag.
b.enabled = p.transition.to !== "disabled";
b.last_transition = p.transition;
if (!b.transitions) b.transitions = [];
b.transitions.push(p.transition);
if (b.transitions.length > 20) {
b.transitions = b.transitions.slice(b.transitions.length - 20);
}
// A backend state change can shift which pool is active and
// therefore which pool-memberships get non-zero effective
// weights, and in turn can flip the frontend's aggregate
// state. Recompute for every frontend — not just the one
// pointed at by this backend — because pool-failover is a
// per-frontend computation and the same backend can appear in
// multiple frontends with different pool placements.
recomputeDerivedState(snap);
}),
);
// Mark every frontend that references this backend as settling so
// the bug-buckets verdict is gated on the next fresh GetVPPLBState
// reconciliation (or the 2s safety timer, whichever fires first).
const snap = state.byName[maglevd];
if (snap) {
for (const fe of snap.frontends) {
if (fe.pools.some((pool) => pool.backends.some((pb) => pb.name === p.backend))) {
markFrontendSettling(maglevd, fe.name);
}
}
}
}
// Frontend-transition events arrive from the server's checker, but
// the SPA no longer trusts their `to` field — recomputeDerivedState
// walks the local backends array on every backend event and every
// hydration to produce an up-to-date frontend state that the server
// can't make stale. Kept as a named reducer so sse.ts's dispatch
// table still has a landing spot for "frontend" events (they also
// flow into the DebugPanel via pushEvent); the body is deliberately
// empty — not a bug.
export function applyFrontendTransition(_maglevd: string, _p: FrontendEventPayload) {
// no-op — state is derived client-side, see recomputeDerivedState
}
// applyLBState merges the per-frontend bucket map for one maglevd
// from a freshly-arrived "lb-state" SSE event. A null/undefined
// per_frontend payload (sent on VPP disconnect or fetch failure)
// clears the cached map so the SPA renders em-dashes in the buckets
// column instead of stale numbers.
//
// The merge is done leaf-by-leaf rather than via wholesale assignment.
// produce's proxy only emits a signal when a property is actually
// written, so guarding each write with `!==` keeps unchanged numbers
// (in particular every drained-to-0 backend) from invalidating their
// downstream reactive reads. Without this, the periodic 30s refresh
// and every same-value re-fetch would re-trigger the Flash animation
// on every cell — which is exactly the visual storm we're avoiding.
export function applyLBState(maglevd: string, p: LBStatePayload) {
setState(
produce((s) => {
const snap = s.byName[maglevd];
if (!snap) return;
const next = p.per_frontend;
const empty = !next || Object.keys(next).length === 0;
if (empty) {
if (snap.lb_state !== undefined) snap.lb_state = undefined;
return;
}
if (!snap.lb_state) {
snap.lb_state = { per_frontend: {} };
}
const cur = snap.lb_state.per_frontend;
// Update / insert leaves that actually changed.
for (const fe of Object.keys(next)) {
if (!cur[fe]) cur[fe] = {};
const curRow = cur[fe];
const nextRow = next[fe];
for (const be of Object.keys(nextRow)) {
if (curRow[be] !== nextRow[be]) curRow[be] = nextRow[be];
}
for (const be of Object.keys(curRow)) {
if (!(be in nextRow)) delete curRow[be];
}
}
// Drop frontends that disappeared from the new snapshot.
for (const fe of Object.keys(cur)) {
if (!(fe in next)) delete cur[fe];
}
}),
);
// A fresh lb-state event means the dataplane data is now at least
// as new as anything we were waiting on — re-enable bug detection.
clearMaglevdSettling(maglevd);
}
// lbBucketsFor looks up the bucket count VPP currently routes to a
// given backend on a given frontend. Returns undefined when the
// snapshot has no LB state at all (VPP disconnected, no fetch yet) or
// when the backend isn't programmed into VPP for that VIP — the view
// renders an em-dash in both cases.
export function lbBucketsFor(
snap: StateSnapshot | undefined,
frontend: string,
backend: string,
): number | undefined {
return snap?.lb_state?.per_frontend?.[frontend]?.[backend];
}
export function applyVPPStatus(maglevd: string, state: string) {
setState(
produce((s) => {
const snap = s.byName[maglevd];
if (!snap) return;
snap.vpp_state = state;
}),
);
}
export function applyMaglevdStatus(maglevd: string, p: MaglevdStatusPayload) {
setState(
produce((s) => {
const snap = s.byName[maglevd];
if (!snap) return;
snap.maglevd.connected = p.connected;
snap.maglevd.last_error = p.last_error;
}),
);
}
// applyConfiguredWeight updates the configured weight of a specific
// backend's pool-membership within a named frontend/pool, then
// recomputes effective weights so pool-failover semantics stay
// consistent. Called from the BackendActionsMenu after a successful
// admin "set weight" POST so the UI reflects the change instantly
// without waiting for the 30s refresh tick. Unlike the previous
// log-event-driven reducer, this one is scoped to exactly the
// pool-membership the operator edited, so it can't leak weights
// across frontends that share the backend.
export function applyConfiguredWeight(
maglevd: string,
frontend: string,
pool: string,
backend: string,
weight: number,
) {
setState(
produce((s) => {
const snap = s.byName[maglevd];
if (!snap) return;
const fe = snap.frontends.find((f) => f.name === frontend);
if (!fe) return;
const p = fe.pools.find((x) => x.name === pool);
if (!p) return;
const pb = p.backends.find((x) => x.name === backend);
if (!pb) return;
pb.weight = weight;
recomputeDerivedState(snap);
}),
);
markFrontendSettling(maglevd, frontend);
}
// FrontendHealth is the per-frontend "is everything actually working"
// verdict computed from backend states, effective weights, and (when
// available) the VPP bucket map. The cascade is intentionally
// priority-ordered: a data-plane disagreement (control says serve,
// VPP routes nothing) is the loudest signal because it usually means
// something is broken in the sync path, not just an unhealthy backend.
//
// "ok" → all backends up, primary serving, every
// eff>0 backend has VPP buckets>0
// "bug-buckets" → some backend with effective_weight>0 has 0
// buckets in VPP — control plane and data
// plane disagree, almost always a bug
// "primary-drained" → primary pool is not serving any traffic
// (every backend in pool[0] has eff=0); the
// frontend is on its fallback or fully down
// "degraded" → at least one backend isn't 'up' but nothing
// worse — typical maintenance / outage state
// "unknown" → fallthrough; should be unreachable, kept as
// a safety net for logic bugs in this function
export type FrontendHealth = "ok" | "bug-buckets" | "primary-drained" | "degraded" | "unknown";
export function frontendHealth(snap: StateSnapshot, fe: FrontendSnapshot): FrontendHealth {
const stateOf: Record<string, string> = {};
for (const b of snap.backends) stateOf[b.name] = b.state;
// The bucket check is only meaningful when we actually have an LB
// state snapshot. On a fresh page load (or with VPP disconnected)
// lb_state is undefined; in that window we fall back to "trust the
// control plane" so the icon still settles to ✅ instead of
// perpetual ❓ until the first GetVPPLBState round-trip.
const lbAvailable = !!snap.lb_state;
const feBuckets = snap.lb_state?.per_frontend?.[fe.name];
// Reactive read of the per-frontend settling flag. While true,
// we're still waiting for the next GetVPPLBState reconciliation
// after a recent control-plane change; the dataplane may be mid-
// reconverge so any "weight>0 but buckets==0" we'd see here is
// almost certainly a race, not a real bug.
const settling = !!state.settling[snap.maglevd.name]?.[fe.name];
let anyDown = false;
let dataplaneBug = false;
for (const pool of fe.pools) {
for (const pb of pool.backends) {
if (stateOf[pb.name] !== "up") anyDown = true;
if (!settling && lbAvailable && pb.effective_weight > 0) {
const b = feBuckets?.[pb.name];
if (b === undefined || b === 0) dataplaneBug = true;
}
}
}
const primary = fe.pools[0];
const primaryHasWeights = !!primary && primary.backends.some((pb) => pb.weight > 0);
const primaryAllZero = !primary || primary.backends.every((pb) => pb.effective_weight === 0);
if (!anyDown && primaryHasWeights && !dataplaneBug) return "ok";
if (dataplaneBug) return "bug-buckets";
if (primaryAllZero) return "primary-drained";
if (anyDown) return "degraded";
return "unknown";
}
export function frontendHealthIcon(snap: StateSnapshot, fe: FrontendSnapshot): string {
switch (frontendHealth(snap, fe)) {
case "ok":
return "✅";
case "bug-buckets":
return "‼️";
case "primary-drained":
return "❗";
case "degraded":
return "⚠️";
case "unknown":
return "❓";
}
}
// Helpers used by views.
// formatVIPAddress renders an address:port string with IPv6 addresses
// wrapped in square brackets. This matches the URL-authority
// convention (RFC 3986 §3.2.2) — without the brackets the colons in
// an IPv6 literal are ambiguous against the port separator. IPv4 is
// left bare.
export function formatVIPAddress(address: string, port: number): string {
if (address.includes(":")) return `[${address}]:${port}`;
return `${address}:${port}`;
}
export function lastTransitionAge(t?: TransitionRecord): string {
// Subscribe to the 1s ticker so the age string updates live as a
// real-time countdown. No effect on layout — the age column is
// unwrapped so the Flash animation never fires for these periodic
// updates.
tick();
if (!t || !t.at_unix_ns || t.at_unix_ns <= 0) return "";
const ms = Date.now() - t.at_unix_ns / 1e6;
const totalSec = Math.floor(ms / 1000);
// Clock skew between maglevd and the browser, plus the fact that
// "1s ago" reads awkwardly, means anything at or below 1s is best
// rendered as "now". Also catches negative values from a future-
// skewed server clock.
if (totalSec <= 1) return "now";
// Render the two most significant units so fresh transitions show
// sub-minute detail ("10m30s") while older transitions round cleanly
// ("1d16h"). A single unit is shown only below one minute, since
// "Xs" has nothing smaller beneath it.
const s = totalSec % 60;
const totalMin = Math.floor(totalSec / 60);
if (totalMin < 1) return `${totalSec}s ago`;
const m = totalMin % 60;
const totalHr = Math.floor(totalMin / 60);
if (totalHr < 1) return `${m}m${s}s ago`;
const h = totalHr % 24;
const d = Math.floor(totalHr / 24);
if (d < 1) return `${totalHr}h${m}m ago`;
return `${d}d${h}h ago`;
}