SPA (cmd/frontend/web): - New "lb buckets" column backed by a 1s-debounced GetVPPLBState fetch loop with leading+trailing edge coalesce. - Per-frontend health icon (✅/⚠️/❗/‼️/❓) in the Zippy header, gated by a settling flag that suppresses ‼️ until the next lb-state reconciliation after a backend transition or weight change. - In-place leaf merge on lb-state so stable bucket values (e.g. "0") don't retrigger the Flash animation on every refresh. - Zippy cards remember open state in a cookie, default closed on fresh load; fixed-width frontend-title-name + reserved icon slot so headers line up across all cards. - Clock-drift watchdog in sse.ts that forces a fresh EventSource on laptop-wake so the broker emits a resync instead of hanging on a dead half-open socket. Frontend service (cmd/frontend): - maglevClient.lbStateLoop, trigger on backend transitions + vpp-connect, best-effort fetch on refreshAll. - Admin handlers explicitly wake the lb-state loop after lifecycle ops and set-weight (the latter emits no transition event on the maglevd side, so the WatchEvents path wouldn't have caught it). - /favicon.ico served from embedded web/public IPng logo. VPP integration: - internal/vpp/lbstate.go: dumpASesForVIP drops Pfx from the dump request (setting it silently wipes IPv4 replies in the LB plugin) and filters results by prefix on the response side instead, which also demuxes multi-VIP-on-same-port cases correctly. maglevc: - Walk now returns the unconsumed token tail; dispatch and the question listener reject unknown commands with a targeted error instead of dumping the full command tree prefixed with garbage. - On '?', echo the current line (including the '?') before the help list so the output reads like birdc. Checker / prober: - internal/checker: ±10% jitter on NextInterval so probes across restart don't all fire on the same tick. - internal/prober: HTTP User-Agent now carries the build version and project URL. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
367 lines
12 KiB
Go
367 lines
12 KiB
Go
// Copyright (c) 2026, Pim van Pelt <pim@ipng.ch>
|
||
|
||
package main
|
||
|
||
import (
|
||
"context"
|
||
"crypto/subtle"
|
||
"encoding/json"
|
||
"fmt"
|
||
"io/fs"
|
||
"log/slog"
|
||
"net/http"
|
||
"strings"
|
||
"time"
|
||
|
||
buildinfo "git.ipng.ch/ipng/vpp-maglev/cmd"
|
||
)
|
||
|
||
// adminCreds holds the basic-auth credentials for the /admin/ surface.
|
||
// Enabled is true when both the user and password env vars were set
|
||
// and non-empty at startup; when false, /admin/ is hidden entirely
|
||
// (returns 404) so operators who never intended to expose it don't
|
||
// see a teasing "unauthorized" response.
|
||
type adminCreds struct {
|
||
User string
|
||
Password string
|
||
Enabled bool
|
||
}
|
||
|
||
func registerHandlers(mux *http.ServeMux, clients []*maglevClient, broker *Broker, admin adminCreds) {
|
||
byName := make(map[string]*maglevClient, len(clients))
|
||
for _, c := range clients {
|
||
byName[c.name] = c
|
||
}
|
||
|
||
mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) {
|
||
_, _ = w.Write([]byte("ok\n"))
|
||
})
|
||
|
||
// Favicon served from the same embedded dist tree Vite produced.
|
||
// Browsers auto-fetch /favicon.ico from the document root regardless
|
||
// of where the SPA itself is mounted, so we register a top-level
|
||
// handler in addition to whatever /view/favicon.ico picks up via the
|
||
// static file server below. Read once at registration so we don't
|
||
// touch the embed.FS on every request, and serve with a long
|
||
// max-age since the bytes never change for a given binary.
|
||
if favicon, ferr := fs.ReadFile(webFS, "web/dist/favicon.ico"); ferr == nil {
|
||
mux.HandleFunc("/favicon.ico", func(w http.ResponseWriter, _ *http.Request) {
|
||
w.Header().Set("Content-Type", "image/x-icon")
|
||
w.Header().Set("Cache-Control", "public, max-age=86400")
|
||
_, _ = w.Write(favicon)
|
||
})
|
||
} else {
|
||
slog.Warn("favicon-missing", "err", ferr)
|
||
}
|
||
|
||
mux.HandleFunc("/view/api/version", func(w http.ResponseWriter, _ *http.Request) {
|
||
writeJSON(w, VersionInfo{
|
||
Version: buildinfo.Version(),
|
||
Commit: buildinfo.Commit(),
|
||
Date: buildinfo.Date(),
|
||
AdminEnabled: admin.Enabled,
|
||
})
|
||
})
|
||
|
||
mux.HandleFunc("/view/api/maglevds", func(w http.ResponseWriter, _ *http.Request) {
|
||
infos := make([]MaglevdInfo, 0, len(clients))
|
||
for _, c := range clients {
|
||
infos = append(infos, c.Info())
|
||
}
|
||
writeJSON(w, infos)
|
||
})
|
||
|
||
mux.HandleFunc("/view/api/state", func(w http.ResponseWriter, _ *http.Request) {
|
||
out := make([]*StateSnapshot, 0, len(clients))
|
||
for _, c := range clients {
|
||
out = append(out, c.Snapshot())
|
||
}
|
||
writeJSON(w, out)
|
||
})
|
||
|
||
mux.HandleFunc("/view/api/state/", func(w http.ResponseWriter, r *http.Request) {
|
||
name := strings.TrimPrefix(r.URL.Path, "/view/api/state/")
|
||
c, ok := byName[name]
|
||
if !ok {
|
||
http.NotFound(w, r)
|
||
return
|
||
}
|
||
writeJSON(w, c.Snapshot())
|
||
})
|
||
|
||
mux.HandleFunc("/view/api/events", func(w http.ResponseWriter, r *http.Request) {
|
||
serveSSE(w, r, broker)
|
||
})
|
||
|
||
// Static SPA served from the embedded dist fs, mounted under /view/.
|
||
staticFS, err := fs.Sub(webFS, "web/dist")
|
||
if err != nil {
|
||
slog.Error("embed-subfs", "err", err)
|
||
return
|
||
}
|
||
fileServer := http.FileServer(http.FS(staticFS))
|
||
mux.Handle("/view/", http.StripPrefix("/view/", fileServer))
|
||
|
||
// /admin/ serves the same SPA shell behind basic auth when the
|
||
// credentials are configured. Only the index.html is served here —
|
||
// all JS, CSS, and assets are referenced via absolute /view/assets/
|
||
// URLs baked in by Vite, so they continue to load from the
|
||
// unauthenticated /view/ tree. Read-only API calls also go to
|
||
// /view/api/* unchanged. Mutation endpoints live under /admin/api/
|
||
// so the same basic-auth middleware covers every writing path.
|
||
if admin.Enabled {
|
||
indexBytes, ierr := fs.ReadFile(staticFS, "index.html")
|
||
if ierr != nil {
|
||
slog.Error("embed-index", "err", ierr)
|
||
return
|
||
}
|
||
serveIndex := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
|
||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||
w.Header().Set("Cache-Control", "no-store")
|
||
_, _ = w.Write(indexBytes)
|
||
})
|
||
adminAPI := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||
handleAdminAPI(w, r, byName)
|
||
})
|
||
realm := "maglevd-frontend admin"
|
||
// Register /admin/api/ before /admin/ so the more specific
|
||
// pattern wins in net/http's ServeMux.
|
||
mux.Handle("/admin/api/", basicAuth(realm, admin.User, admin.Password, adminAPI))
|
||
mux.Handle("/admin/", basicAuth(realm, admin.User, admin.Password, serveIndex))
|
||
}
|
||
|
||
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||
if r.URL.Path == "/" {
|
||
http.Redirect(w, r, "/view/", http.StatusFound)
|
||
return
|
||
}
|
||
http.NotFound(w, r)
|
||
})
|
||
}
|
||
|
||
// handleAdminAPI dispatches mutation requests under /admin/api/.
|
||
//
|
||
// Supported shapes:
|
||
//
|
||
// POST /admin/api/{maglevd}/backend/{name}/{pause|resume|enable|disable}
|
||
// → fresh BackendSnapshot as JSON
|
||
//
|
||
// POST /admin/api/{maglevd}/frontend/{fe}/pool/{pool}/backend/{name}/weight
|
||
// body: {"weight": 0-100, "flush": bool}
|
||
// → fresh FrontendSnapshot as JSON
|
||
//
|
||
// The WatchEvents stream also delivers a backend-transition (and, for
|
||
// the weight case, no event — since the config mutation doesn't flip
|
||
// the health state). The POST response is primarily for the
|
||
// originating SPA to learn about failures and to refresh effective
|
||
// weights immediately. Errors from the gRPC side are surfaced as
|
||
// 400 (bad request) or 502 (maglevd returned an error).
|
||
func handleAdminAPI(w http.ResponseWriter, r *http.Request, byName map[string]*maglevClient) {
|
||
if r.Method != http.MethodPost {
|
||
w.Header().Set("Allow", "POST")
|
||
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
||
return
|
||
}
|
||
parts := strings.Split(strings.TrimPrefix(r.URL.Path, "/admin/api/"), "/")
|
||
// Peel off the maglevd name (always the first segment).
|
||
if len(parts) < 2 {
|
||
http.NotFound(w, r)
|
||
return
|
||
}
|
||
maglevd := parts[0]
|
||
c, ok := byName[maglevd]
|
||
if !ok {
|
||
http.NotFound(w, r)
|
||
return
|
||
}
|
||
rest := parts[1:]
|
||
|
||
switch {
|
||
// {maglevd}/backend/{name}/{action}
|
||
case len(rest) == 3 && rest[0] == "backend":
|
||
handleBackendLifecycle(w, r, c, rest[1], rest[2])
|
||
// {maglevd}/frontend/{fe}/pool/{pool}/backend/{name}/weight
|
||
case len(rest) == 7 && rest[0] == "frontend" && rest[2] == "pool" && rest[4] == "backend" && rest[6] == "weight":
|
||
handleBackendWeight(w, r, c, rest[1], rest[3], rest[5])
|
||
default:
|
||
http.NotFound(w, r)
|
||
}
|
||
}
|
||
|
||
func handleBackendLifecycle(w http.ResponseWriter, r *http.Request, c *maglevClient, name, action string) {
|
||
switch action {
|
||
case "pause", "resume", "enable", "disable":
|
||
default:
|
||
http.Error(w, fmt.Sprintf("unknown action %q", action), http.StatusBadRequest)
|
||
return
|
||
}
|
||
ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
|
||
defer cancel()
|
||
snap, err := c.BackendAction(ctx, name, action)
|
||
if err != nil {
|
||
slog.Warn("admin-backend-action", "maglevd", c.name, "backend", name, "action", action, "err", err)
|
||
http.Error(w, err.Error(), http.StatusBadGateway)
|
||
return
|
||
}
|
||
slog.Info("admin-backend-action",
|
||
"maglevd", c.name, "backend", name, "action", action, "state", snap.State)
|
||
// The maglevd→watch path will deliver a transition event that
|
||
// also wakes the lb-state loop, but firing here too makes the
|
||
// admin path self-contained and shaves the worst-case race
|
||
// where the SPA is still waiting on the WatchEvents replay
|
||
// when the POST response lands. The debouncer coalesces any
|
||
// duplicate wake.
|
||
c.triggerLBStateFetch()
|
||
writeJSON(w, snap)
|
||
}
|
||
|
||
type setWeightBody struct {
|
||
Weight int32 `json:"weight"`
|
||
Flush bool `json:"flush"`
|
||
}
|
||
|
||
func handleBackendWeight(w http.ResponseWriter, r *http.Request, c *maglevClient, frontend, pool, backend string) {
|
||
var body setWeightBody
|
||
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
|
||
http.Error(w, fmt.Sprintf("bad json: %v", err), http.StatusBadRequest)
|
||
return
|
||
}
|
||
if body.Weight < 0 || body.Weight > 100 {
|
||
http.Error(w, fmt.Sprintf("weight %d out of range [0, 100]", body.Weight), http.StatusBadRequest)
|
||
return
|
||
}
|
||
ctx, cancel := context.WithTimeout(r.Context(), 10*time.Second)
|
||
defer cancel()
|
||
snap, err := c.SetBackendWeight(ctx, frontend, pool, backend, body.Weight, body.Flush)
|
||
if err != nil {
|
||
slog.Warn("admin-set-weight",
|
||
"maglevd", c.name, "frontend", frontend, "pool", pool, "backend", backend,
|
||
"weight", body.Weight, "flush", body.Flush, "err", err)
|
||
http.Error(w, err.Error(), http.StatusBadGateway)
|
||
return
|
||
}
|
||
slog.Info("admin-set-weight",
|
||
"maglevd", c.name, "frontend", frontend, "pool", pool, "backend", backend,
|
||
"weight", body.Weight, "flush", body.Flush)
|
||
// Weight changes never produce a transition event on the maglevd
|
||
// side (the backend's state is unchanged), so the WatchEvents
|
||
// stream won't wake the lb-state loop for us — without an explicit
|
||
// trigger here the SPA's bucket column would stay stale until the
|
||
// next 30s refresh tick. SyncLBStateVIP on the maglevd side has
|
||
// already pushed the new weights into VPP synchronously, so the
|
||
// fetch we kick off will see fresh post-mutation buckets.
|
||
c.triggerLBStateFetch()
|
||
writeJSON(w, snap)
|
||
}
|
||
|
||
// basicAuth wraps a handler in an HTTP basic-auth check. Uses
|
||
// subtle.ConstantTimeCompare to avoid leaking credential length or
|
||
// content via response-timing side channels.
|
||
func basicAuth(realm, user, password string, next http.Handler) http.Handler {
|
||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||
u, p, ok := r.BasicAuth()
|
||
// Compare fixed-length byte slices so a wrong username takes
|
||
// the same time as a wrong password; only the boolean result
|
||
// matters.
|
||
uOK := subtle.ConstantTimeCompare([]byte(u), []byte(user)) == 1
|
||
pOK := subtle.ConstantTimeCompare([]byte(p), []byte(password)) == 1
|
||
if !ok || !uOK || !pOK {
|
||
w.Header().Set("WWW-Authenticate", fmt.Sprintf(`Basic realm=%q`, realm))
|
||
http.Error(w, "unauthorized", http.StatusUnauthorized)
|
||
return
|
||
}
|
||
next.ServeHTTP(w, r)
|
||
})
|
||
}
|
||
|
||
func writeJSON(w http.ResponseWriter, v any) {
|
||
w.Header().Set("Content-Type", "application/json")
|
||
enc := json.NewEncoder(w)
|
||
enc.SetEscapeHTML(false)
|
||
if err := enc.Encode(v); err != nil {
|
||
slog.Error("json-encode", "err", err)
|
||
}
|
||
}
|
||
|
||
// serveSSE handles the long-lived /view/api/events stream. The operational
|
||
// requirements (retry hint, heartbeat, flush-after-write, X-Accel-Buffering,
|
||
// context-done teardown) are documented in PLAN_FRONTEND.md §SSE operational
|
||
// requirements.
|
||
func serveSSE(w http.ResponseWriter, r *http.Request, broker *Broker) {
|
||
flusher, ok := w.(http.Flusher)
|
||
if !ok {
|
||
http.Error(w, "streaming unsupported", http.StatusInternalServerError)
|
||
return
|
||
}
|
||
|
||
h := w.Header()
|
||
h.Set("Content-Type", "text/event-stream")
|
||
h.Set("Cache-Control", "no-cache")
|
||
h.Set("Connection", "keep-alive")
|
||
h.Set("X-Accel-Buffering", "no")
|
||
w.WriteHeader(http.StatusOK)
|
||
|
||
// Reconnect hint: EventSource default is 3–5s; 2s feels livelier.
|
||
fmt.Fprintf(w, "retry: 2000\n\n")
|
||
flusher.Flush()
|
||
|
||
result := broker.Subscribe(r.Header.Get("Last-Event-ID"))
|
||
defer broker.Unsubscribe(result.Channel)
|
||
|
||
if result.NeedResync {
|
||
// No id: line — the browser keeps whatever Last-Event-ID it had,
|
||
// so subsequent reconnects compare against a real event ID.
|
||
fmt.Fprintf(w, "event: resync\ndata: {}\n\n")
|
||
flusher.Flush()
|
||
}
|
||
for _, ev := range result.ReplayEvents {
|
||
if err := writeEvent(w, ev); err != nil {
|
||
return
|
||
}
|
||
flusher.Flush()
|
||
}
|
||
|
||
heartbeat := time.NewTicker(15 * time.Second)
|
||
defer heartbeat.Stop()
|
||
|
||
for {
|
||
select {
|
||
case <-r.Context().Done():
|
||
return
|
||
case ev, ok := <-result.Channel:
|
||
if !ok {
|
||
return
|
||
}
|
||
if err := writeEvent(w, ev); err != nil {
|
||
return
|
||
}
|
||
flusher.Flush()
|
||
case <-heartbeat.C:
|
||
if _, err := fmt.Fprintf(w, ": ping\n\n"); err != nil {
|
||
return
|
||
}
|
||
flusher.Flush()
|
||
}
|
||
}
|
||
}
|
||
|
||
func writeEvent(w http.ResponseWriter, ev deliveredEvent) error {
|
||
// "resync" goes out as a named SSE event so the SPA's existing
|
||
// addEventListener("resync", ...) handler fires (and not the
|
||
// default onmessage path). Every other event type keeps the
|
||
// default onmessage path with a JSON body. We still emit an id
|
||
// so a reconnecting browser can replay from the right point in
|
||
// the ring; the resync handler is idempotent (a duplicate
|
||
// replay just triggers a redundant fetchState).
|
||
if ev.Event.Type == "resync" {
|
||
_, err := fmt.Fprintf(w, "id: %s\nevent: resync\ndata: {}\n\n", ev.ID)
|
||
return err
|
||
}
|
||
body, err := json.Marshal(ev.Event)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
_, err = fmt.Fprintf(w, "id: %s\ndata: %s\n\n", ev.ID, body)
|
||
return err
|
||
}
|