Introduces maglev-frontend, a responsive, real-time web dashboard for one
or more running maglevd instances. Source lives at cmd/frontend/; the
built binary is maglev-frontend. It is a single Go process with the
SolidJS SPA embedded via //go:embed — no runtime file dependencies.
Architecture
- One persistent gRPC connection per configured maglevd (-server A,B,C).
Each connection runs three background loops: a WatchEvents stream
subscribed at log_level=debug for live events, a 30s refresh loop as
a safety net for drift, and a 5s health loop that surfaces connection
drops quickly.
- In-process pub/sub broker with a 30s / 2000-event replay ring using
<epoch>-<seq> monotonic IDs. Short browser reconnects (nginx idle,
wifi flap, laptop wake) silently replay buffered events via the
EventSource Last-Event-ID header; longer outages or frontend restarts
fall through to a "resync" event that triggers a full state refetch.
- HTTP surface: /view/ (SPA), /view/api/state, /view/api/state/{name},
/view/api/maglevds, /view/api/version, /view/api/events (SSE),
/healthz, and an /admin/* placeholder returning 501 for a future
basic-auth mutation surface.
- SSE handler follows the full operational checklist: retry hint, 15s
: ping heartbeat, Flush after every write, r.Context().Done() teardown,
X-Accel-Buffering: no, and no gzip.
SolidJS SPA (cmd/frontend/web/, Vite + TypeScript)
- solid-js/store for a reactive per-maglevd state tree; reducers apply
backend transitions, maglevd-status flips, and resync refetches.
- Scope selector tabs for multi-maglevd support, per-maglevd frontend
cards with pool tables showing state, configured weight, effective
weight, and last-transition age.
- ProbeHeartbeat component turns a middle-dot into ❤️ on probe-start and
back on probe-done, driven by real log events; fixed-size wrapper so
the emoji swap doesn't jiggle the row.
- Flash wrapper animates any primitive on change (1s yellow fade via
Web Animations API, skipped on first mount). Wired into the state
badge, configured weight, and effective weight columns.
- DebugPanel: chronological rolling event tail with tail-style auto-
scroll, pause/resume, and scope/firehose filter. Syntactic highlight
for vpp-lb-sync-* events with fixed-order attribute formatting.
- Live effective_weight updates: vpp-lb-sync-as-added/removed/weight-
updated log events are routed through a reducer that walks the
snapshot's pool rows and sets effective_weight on every match
without waiting for the 30s refresh.
- Header shows build version + commit with build date in a tooltip,
fetched once from /view/api/version on mount.
- Prettier wired in as the web-side fixstyle; make fixstyle now tidies
both Go and web in one shot via a new fixstyle-web target.
Per-mutation VPP LB sync logging
- Promotes the addVIP/delVIP/addAS/delAS/setASWeight helpers from
slog.Debug to slog.Info and renames them from vpp-lbsync-* to
vpp-lb-sync-{vip-added,vip-removed,as-added,as-removed,as-weight-
updated}. Matching rename for vpp-lb-sync-start / -done / -error /
-vip-recreate. The Prometheus metric name (maglev_vpp_lbsync_total)
is left alone to preserve dashboards.
- setASWeight now takes the prior weight so the event can emit
from=X to=Y and the UI can show the delta.
- The vip field in every event is the bare address (no /32 or /128
mask), matching the CLI output style.
- Any listener on the gRPC WatchEvents stream — CLI watch events or
maglev-frontend — now sees every VIP/AS dataplane change in real
time without needing to raise the log level.
Build and tooling
- Makefile: maglev-frontend added to BINARIES; build / build-amd64 /
build-arm64 emit the binary alongside maglevd and maglevc. A new
maglev-frontend-web target rebuilds the SolidJS bundle via npm.
- web/dist/ is tracked so a bare `go build` keeps working for Go-only
contributors and CI.
- .gitignore skips cmd/frontend/web/node_modules/.
Stability fixes
- maglevd's WatchEvents synthetic replay events (from==to, at_unix_ns=0)
were corrupting the frontend's LastTransition cache with at=0,
rendering as "20555d ago" in the browser. Client now skips synthetic
events: the cache comes from refreshAll and doesn't need them.
- Frontends, Backends, and HealthChecks are now served in the order
returned by the corresponding List* RPC instead of Go map iteration
order, so reloads and refreshes keep the SPA stable.
501 lines
14 KiB
Go
501 lines
14 KiB
Go
// Copyright (c) 2026, Pim van Pelt <pim@ipng.ch>
|
|
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/credentials/insecure"
|
|
|
|
"git.ipng.ch/ipng/vpp-maglev/internal/grpcapi"
|
|
)
|
|
|
|
// maglevClient is a per-maglevd gRPC client plus cache and background loops.
|
|
type maglevClient struct {
|
|
name string
|
|
address string
|
|
conn *grpc.ClientConn
|
|
api grpcapi.MaglevClient
|
|
broker *Broker
|
|
|
|
mu sync.RWMutex
|
|
connected bool
|
|
lastErr string
|
|
cache cachedState
|
|
}
|
|
|
|
// cachedState is the per-maglevd snapshot served via the REST handlers.
|
|
// Frontends / Backends / HealthChecks are maps for O(1) lookup from the
|
|
// event path, and the *Order slices preserve the order returned by the
|
|
// corresponding List* RPC so the UI renders in a stable order across
|
|
// reloads instead of Go map iteration's randomised order.
|
|
type cachedState struct {
|
|
Frontends map[string]*FrontendSnapshot
|
|
FrontendsOrder []string
|
|
Backends map[string]*BackendSnapshot
|
|
BackendsOrder []string
|
|
HealthChecks map[string]*HealthCheckSnapshot
|
|
HealthCheckOrder []string
|
|
VPPInfo *VPPInfoSnapshot
|
|
LastRefresh time.Time
|
|
}
|
|
|
|
func newMaglevClient(address string, broker *Broker) (*maglevClient, error) {
|
|
conn, err := grpc.NewClient(address,
|
|
grpc.WithTransportCredentials(insecure.NewCredentials()))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return &maglevClient{
|
|
name: hostnameOf(address),
|
|
address: address,
|
|
conn: conn,
|
|
api: grpcapi.NewMaglevClient(conn),
|
|
broker: broker,
|
|
cache: cachedState{
|
|
Frontends: map[string]*FrontendSnapshot{},
|
|
Backends: map[string]*BackendSnapshot{},
|
|
HealthChecks: map[string]*HealthCheckSnapshot{},
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
// hostnameOf strips the port from an address and returns a short display
|
|
// name. For DNS names we take the first label ("lb-ams.internal:9090" →
|
|
// "lb-ams"). For IP literals we return the full address so we don't
|
|
// accidentally truncate "127.0.0.1" to "127".
|
|
func hostnameOf(address string) string {
|
|
host := address
|
|
if h, _, err := net.SplitHostPort(address); err == nil {
|
|
host = h
|
|
}
|
|
host = strings.TrimPrefix(strings.TrimSuffix(host, "]"), "[")
|
|
if net.ParseIP(host) != nil {
|
|
return host
|
|
}
|
|
if i := strings.Index(host, "."); i >= 0 {
|
|
return host[:i]
|
|
}
|
|
return host
|
|
}
|
|
|
|
func (c *maglevClient) Close() {
|
|
_ = c.conn.Close()
|
|
}
|
|
|
|
func (c *maglevClient) Start(ctx context.Context) {
|
|
go c.watchLoop(ctx)
|
|
go c.refreshLoop(ctx)
|
|
go c.healthLoop(ctx)
|
|
}
|
|
|
|
func (c *maglevClient) setConnected(ok bool, errMsg string) {
|
|
c.mu.Lock()
|
|
prev := c.connected
|
|
c.connected = ok
|
|
c.lastErr = errMsg
|
|
c.mu.Unlock()
|
|
if prev != ok {
|
|
payload, _ := json.Marshal(MaglevdStatusPayload{Connected: ok, LastError: errMsg})
|
|
c.broker.Publish(BrowserEvent{
|
|
Maglevd: c.name,
|
|
Type: "maglevd-status",
|
|
AtUnixNs: time.Now().UnixNano(),
|
|
Payload: payload,
|
|
})
|
|
}
|
|
}
|
|
|
|
// Info returns the current connection status for this maglevd.
|
|
func (c *maglevClient) Info() MaglevdInfo {
|
|
c.mu.RLock()
|
|
defer c.mu.RUnlock()
|
|
return MaglevdInfo{
|
|
Name: c.name,
|
|
Address: c.address,
|
|
Connected: c.connected,
|
|
LastError: c.lastErr,
|
|
}
|
|
}
|
|
|
|
// Snapshot returns a deep-ish copy of the cached state for REST handlers.
|
|
// Iteration order follows the corresponding *Order slice so the UI sees a
|
|
// stable, RPC-defined order across reloads.
|
|
func (c *maglevClient) Snapshot() *StateSnapshot {
|
|
c.mu.RLock()
|
|
defer c.mu.RUnlock()
|
|
snap := &StateSnapshot{
|
|
Maglevd: MaglevdInfo{
|
|
Name: c.name,
|
|
Address: c.address,
|
|
Connected: c.connected,
|
|
LastError: c.lastErr,
|
|
},
|
|
Frontends: make([]*FrontendSnapshot, 0, len(c.cache.FrontendsOrder)),
|
|
Backends: make([]*BackendSnapshot, 0, len(c.cache.BackendsOrder)),
|
|
HealthChecks: make([]*HealthCheckSnapshot, 0, len(c.cache.HealthCheckOrder)),
|
|
VPPInfo: c.cache.VPPInfo,
|
|
}
|
|
for _, name := range c.cache.FrontendsOrder {
|
|
if f, ok := c.cache.Frontends[name]; ok {
|
|
snap.Frontends = append(snap.Frontends, f)
|
|
}
|
|
}
|
|
for _, name := range c.cache.BackendsOrder {
|
|
if b, ok := c.cache.Backends[name]; ok {
|
|
snap.Backends = append(snap.Backends, b)
|
|
}
|
|
}
|
|
for _, name := range c.cache.HealthCheckOrder {
|
|
if h, ok := c.cache.HealthChecks[name]; ok {
|
|
snap.HealthChecks = append(snap.HealthChecks, h)
|
|
}
|
|
}
|
|
return snap
|
|
}
|
|
|
|
// refreshAll pulls a full fresh view of the maglevd's state into the cache.
|
|
// Called from the refreshLoop every 30s and immediately after a successful
|
|
// reconnect.
|
|
func (c *maglevClient) refreshAll(ctx context.Context) error {
|
|
rctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
|
defer cancel()
|
|
|
|
frontends := map[string]*FrontendSnapshot{}
|
|
fl, err := c.api.ListFrontends(rctx, &grpcapi.ListFrontendsRequest{})
|
|
if err != nil {
|
|
return fmt.Errorf("list frontends: %w", err)
|
|
}
|
|
frontendsOrder := append([]string(nil), fl.GetFrontendNames()...)
|
|
for _, name := range frontendsOrder {
|
|
fi, err := c.api.GetFrontend(rctx, &grpcapi.GetFrontendRequest{Name: name})
|
|
if err != nil {
|
|
return fmt.Errorf("get frontend %s: %w", name, err)
|
|
}
|
|
frontends[name] = frontendFromProto(fi)
|
|
}
|
|
|
|
backends := map[string]*BackendSnapshot{}
|
|
bl, err := c.api.ListBackends(rctx, &grpcapi.ListBackendsRequest{})
|
|
if err != nil {
|
|
return fmt.Errorf("list backends: %w", err)
|
|
}
|
|
backendsOrder := append([]string(nil), bl.GetBackendNames()...)
|
|
for _, name := range backendsOrder {
|
|
bi, err := c.api.GetBackend(rctx, &grpcapi.GetBackendRequest{Name: name})
|
|
if err != nil {
|
|
return fmt.Errorf("get backend %s: %w", name, err)
|
|
}
|
|
backends[name] = backendFromProto(bi)
|
|
}
|
|
|
|
healthchecks := map[string]*HealthCheckSnapshot{}
|
|
hl, err := c.api.ListHealthChecks(rctx, &grpcapi.ListHealthChecksRequest{})
|
|
if err != nil {
|
|
return fmt.Errorf("list healthchecks: %w", err)
|
|
}
|
|
healthCheckOrder := append([]string(nil), hl.GetNames()...)
|
|
for _, name := range healthCheckOrder {
|
|
hi, err := c.api.GetHealthCheck(rctx, &grpcapi.GetHealthCheckRequest{Name: name})
|
|
if err != nil {
|
|
return fmt.Errorf("get healthcheck %s: %w", name, err)
|
|
}
|
|
healthchecks[name] = healthCheckFromProto(hi)
|
|
}
|
|
|
|
var vppInfo *VPPInfoSnapshot
|
|
if vi, err := c.api.GetVPPInfo(rctx, &grpcapi.GetVPPInfoRequest{}); err == nil {
|
|
vppInfo = &VPPInfoSnapshot{
|
|
Version: vi.GetVersion(),
|
|
BuildDate: vi.GetBuildDate(),
|
|
PID: vi.GetPid(),
|
|
BoottimeNs: vi.GetBoottimeNs(),
|
|
ConnecttimeNs: vi.GetConnecttimeNs(),
|
|
}
|
|
}
|
|
|
|
c.mu.Lock()
|
|
c.cache.Frontends = frontends
|
|
c.cache.FrontendsOrder = frontendsOrder
|
|
c.cache.Backends = backends
|
|
c.cache.BackendsOrder = backendsOrder
|
|
c.cache.HealthChecks = healthchecks
|
|
c.cache.HealthCheckOrder = healthCheckOrder
|
|
c.cache.VPPInfo = vppInfo
|
|
c.cache.LastRefresh = time.Now()
|
|
c.mu.Unlock()
|
|
return nil
|
|
}
|
|
|
|
// watchLoop subscribes to WatchEvents and feeds the broker until the context
|
|
// is cancelled. Reconnects with exponential backoff on stream errors.
|
|
func (c *maglevClient) watchLoop(ctx context.Context) {
|
|
backoff := time.Second
|
|
maxBackoff := 30 * time.Second
|
|
for {
|
|
if ctx.Err() != nil {
|
|
return
|
|
}
|
|
if err := c.watchOnce(ctx); err != nil {
|
|
if ctx.Err() != nil {
|
|
return
|
|
}
|
|
slog.Warn("watch-disconnected", "maglevd", c.name, "err", err)
|
|
c.setConnected(false, err.Error())
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-time.After(backoff):
|
|
}
|
|
backoff *= 2
|
|
if backoff > maxBackoff {
|
|
backoff = maxBackoff
|
|
}
|
|
continue
|
|
}
|
|
backoff = time.Second
|
|
}
|
|
}
|
|
|
|
func (c *maglevClient) watchOnce(ctx context.Context) error {
|
|
logFlag := true
|
|
backendFlag := true
|
|
frontendFlag := true
|
|
req := &grpcapi.WatchRequest{
|
|
Log: &logFlag,
|
|
LogLevel: "debug",
|
|
Backend: &backendFlag,
|
|
Frontend: &frontendFlag,
|
|
}
|
|
stream, err := c.api.WatchEvents(ctx, req)
|
|
if err != nil {
|
|
return fmt.Errorf("open stream: %w", err)
|
|
}
|
|
// Successful subscribe: mark connected and pull a fresh snapshot so
|
|
// the REST cache is immediately ground-truth accurate. WatchEvents
|
|
// itself replays current state as synthetic from==to events, which
|
|
// will also update the cache as they arrive.
|
|
c.setConnected(true, "")
|
|
if err := c.refreshAll(ctx); err != nil {
|
|
slog.Warn("refresh-after-watch", "maglevd", c.name, "err", err)
|
|
}
|
|
for {
|
|
ev, err := stream.Recv()
|
|
if err != nil {
|
|
if errors.Is(err, io.EOF) || ctx.Err() != nil {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
c.handleEvent(ev)
|
|
}
|
|
}
|
|
|
|
// handleEvent applies an incoming gRPC event to the local cache and
|
|
// publishes a corresponding BrowserEvent on the broker.
|
|
func (c *maglevClient) handleEvent(ev *grpcapi.Event) {
|
|
switch body := ev.GetEvent().(type) {
|
|
case *grpcapi.Event_Log:
|
|
le := body.Log
|
|
if le == nil {
|
|
return
|
|
}
|
|
attrs := make(map[string]string, len(le.GetAttrs()))
|
|
for _, a := range le.GetAttrs() {
|
|
attrs[a.GetKey()] = a.GetValue()
|
|
}
|
|
payload, _ := json.Marshal(LogEventPayload{
|
|
Level: le.GetLevel(),
|
|
Msg: le.GetMsg(),
|
|
Attrs: attrs,
|
|
})
|
|
c.broker.Publish(BrowserEvent{
|
|
Maglevd: c.name,
|
|
Type: "log",
|
|
AtUnixNs: le.GetAtUnixNs(),
|
|
Payload: payload,
|
|
})
|
|
|
|
case *grpcapi.Event_Backend:
|
|
be := body.Backend
|
|
if be == nil || be.GetTransition() == nil {
|
|
return
|
|
}
|
|
tr := transitionFromProto(be.GetTransition())
|
|
// maglevd replays current state on WatchEvents subscribe as a
|
|
// synthetic event with from==to and at_unix_ns=0 (see
|
|
// internal/grpcapi/server.go). It is not a real transition — the
|
|
// in-process cache is already correct from refreshAll, so don't
|
|
// touch LastTransition (which would clobber it with at=0 and
|
|
// render as "55 years ago" in the browser) and don't forward to
|
|
// the broker.
|
|
if tr.From == tr.To {
|
|
return
|
|
}
|
|
c.applyBackendTransition(be.GetBackendName(), tr)
|
|
payload, _ := json.Marshal(BackendEventPayload{
|
|
Backend: be.GetBackendName(),
|
|
Transition: *tr,
|
|
})
|
|
c.broker.Publish(BrowserEvent{
|
|
Maglevd: c.name,
|
|
Type: "backend",
|
|
AtUnixNs: tr.AtUnixNs,
|
|
Payload: payload,
|
|
})
|
|
|
|
case *grpcapi.Event_Frontend:
|
|
fe := body.Frontend
|
|
if fe == nil || fe.GetTransition() == nil {
|
|
return
|
|
}
|
|
tr := transitionFromProto(fe.GetTransition())
|
|
if tr.From == tr.To {
|
|
return
|
|
}
|
|
payload, _ := json.Marshal(FrontendEventPayload{
|
|
Frontend: fe.GetFrontendName(),
|
|
Transition: *tr,
|
|
})
|
|
c.broker.Publish(BrowserEvent{
|
|
Maglevd: c.name,
|
|
Type: "frontend",
|
|
AtUnixNs: tr.AtUnixNs,
|
|
Payload: payload,
|
|
})
|
|
}
|
|
}
|
|
|
|
func (c *maglevClient) applyBackendTransition(name string, tr *TransitionRecord) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
b, ok := c.cache.Backends[name]
|
|
if !ok {
|
|
b = &BackendSnapshot{Name: name}
|
|
c.cache.Backends[name] = b
|
|
c.cache.BackendsOrder = append(c.cache.BackendsOrder, name)
|
|
}
|
|
b.State = tr.To
|
|
b.LastTransition = tr
|
|
b.Transitions = append(b.Transitions, tr)
|
|
// Cap history to the most recent 20 entries to mirror what maglevd
|
|
// returns from GetBackend.
|
|
if len(b.Transitions) > 20 {
|
|
b.Transitions = b.Transitions[len(b.Transitions)-20:]
|
|
}
|
|
}
|
|
|
|
// refreshLoop pulls a fresh snapshot every 30s to catch anything the live
|
|
// event stream may have missed (e.g. during a brief gRPC reconnect).
|
|
func (c *maglevClient) refreshLoop(ctx context.Context) {
|
|
t := time.NewTicker(30 * time.Second)
|
|
defer t.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-t.C:
|
|
if err := c.refreshAll(ctx); err != nil {
|
|
slog.Debug("refresh-all", "maglevd", c.name, "err", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// healthLoop issues a cheap GetVPPInfo every 5s to surface connection drops
|
|
// quickly. Errors flip the connection indicator; recoveries trigger a
|
|
// refreshAll so the cache catches up.
|
|
func (c *maglevClient) healthLoop(ctx context.Context) {
|
|
t := time.NewTicker(5 * time.Second)
|
|
defer t.Stop()
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-t.C:
|
|
hctx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
|
_, err := c.api.GetVPPInfo(hctx, &grpcapi.GetVPPInfoRequest{})
|
|
cancel()
|
|
if err != nil {
|
|
c.setConnected(false, err.Error())
|
|
} else {
|
|
c.setConnected(true, "")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ---- proto → JSON helpers --------------------------------------------------
|
|
|
|
func frontendFromProto(fi *grpcapi.FrontendInfo) *FrontendSnapshot {
|
|
out := &FrontendSnapshot{
|
|
Name: fi.GetName(),
|
|
Address: fi.GetAddress(),
|
|
Protocol: fi.GetProtocol(),
|
|
Port: fi.GetPort(),
|
|
Description: fi.GetDescription(),
|
|
SrcIPSticky: fi.GetSrcIpSticky(),
|
|
}
|
|
for _, p := range fi.GetPools() {
|
|
ps := &PoolSnapshot{Name: p.GetName()}
|
|
for _, pb := range p.GetBackends() {
|
|
ps.Backends = append(ps.Backends, &PoolBackendSnapshot{
|
|
Name: pb.GetName(),
|
|
Weight: pb.GetWeight(),
|
|
EffectiveWeight: pb.GetEffectiveWeight(),
|
|
})
|
|
}
|
|
out.Pools = append(out.Pools, ps)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func backendFromProto(bi *grpcapi.BackendInfo) *BackendSnapshot {
|
|
out := &BackendSnapshot{
|
|
Name: bi.GetName(),
|
|
Address: bi.GetAddress(),
|
|
State: bi.GetState(),
|
|
Enabled: bi.GetEnabled(),
|
|
HealthCheck: bi.GetHealthcheck(),
|
|
}
|
|
for _, t := range bi.GetTransitions() {
|
|
out.Transitions = append(out.Transitions, transitionFromProto(t))
|
|
}
|
|
if n := len(out.Transitions); n > 0 {
|
|
out.LastTransition = out.Transitions[n-1]
|
|
}
|
|
return out
|
|
}
|
|
|
|
func transitionFromProto(t *grpcapi.TransitionRecord) *TransitionRecord {
|
|
return &TransitionRecord{
|
|
From: t.GetFrom(),
|
|
To: t.GetTo(),
|
|
AtUnixNs: t.GetAtUnixNs(),
|
|
}
|
|
}
|
|
|
|
func healthCheckFromProto(h *grpcapi.HealthCheckInfo) *HealthCheckSnapshot {
|
|
return &HealthCheckSnapshot{
|
|
Name: h.GetName(),
|
|
Type: h.GetType(),
|
|
Port: h.GetPort(),
|
|
IntervalNs: h.GetIntervalNs(),
|
|
FastIntervalNs: h.GetFastIntervalNs(),
|
|
DownIntervalNs: h.GetDownIntervalNs(),
|
|
TimeoutNs: h.GetTimeoutNs(),
|
|
Rise: h.GetRise(),
|
|
Fall: h.GetFall(),
|
|
}
|
|
}
|