// Copyright (c) 2026, Pim van Pelt package main import ( "context" "encoding/json" "errors" "fmt" "io" "log/slog" "net" "strings" "sync" "time" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "git.ipng.ch/ipng/vpp-maglev/internal/grpcapi" ) // maglevClient is a per-maglevd gRPC client plus cache and background loops. type maglevClient struct { name string address string conn *grpc.ClientConn api grpcapi.MaglevClient broker *Broker mu sync.RWMutex connected bool lastErr string cache cachedState } // cachedState is the per-maglevd snapshot served via the REST handlers. // Frontends / Backends / HealthChecks are maps for O(1) lookup from the // event path, and the *Order slices preserve the order returned by the // corresponding List* RPC so the UI renders in a stable order across // reloads instead of Go map iteration's randomised order. type cachedState struct { Frontends map[string]*FrontendSnapshot FrontendsOrder []string Backends map[string]*BackendSnapshot BackendsOrder []string HealthChecks map[string]*HealthCheckSnapshot HealthCheckOrder []string VPPInfo *VPPInfoSnapshot LastRefresh time.Time } func newMaglevClient(address string, broker *Broker) (*maglevClient, error) { conn, err := grpc.NewClient(address, grpc.WithTransportCredentials(insecure.NewCredentials())) if err != nil { return nil, err } return &maglevClient{ name: hostnameOf(address), address: address, conn: conn, api: grpcapi.NewMaglevClient(conn), broker: broker, cache: cachedState{ Frontends: map[string]*FrontendSnapshot{}, Backends: map[string]*BackendSnapshot{}, HealthChecks: map[string]*HealthCheckSnapshot{}, }, }, nil } // hostnameOf strips the port from an address and returns a short display // name. For DNS names we take the first label ("lb-ams.internal:9090" → // "lb-ams"). For IP literals we return the full address so we don't // accidentally truncate "127.0.0.1" to "127". func hostnameOf(address string) string { host := address if h, _, err := net.SplitHostPort(address); err == nil { host = h } host = strings.TrimPrefix(strings.TrimSuffix(host, "]"), "[") if net.ParseIP(host) != nil { return host } if i := strings.Index(host, "."); i >= 0 { return host[:i] } return host } func (c *maglevClient) Close() { _ = c.conn.Close() } func (c *maglevClient) Start(ctx context.Context) { go c.watchLoop(ctx) go c.refreshLoop(ctx) go c.healthLoop(ctx) } func (c *maglevClient) setConnected(ok bool, errMsg string) { c.mu.Lock() prev := c.connected c.connected = ok c.lastErr = errMsg c.mu.Unlock() if prev != ok { payload, _ := json.Marshal(MaglevdStatusPayload{Connected: ok, LastError: errMsg}) c.broker.Publish(BrowserEvent{ Maglevd: c.name, Type: "maglevd-status", AtUnixNs: time.Now().UnixNano(), Payload: payload, }) } } // Info returns the current connection status for this maglevd. func (c *maglevClient) Info() MaglevdInfo { c.mu.RLock() defer c.mu.RUnlock() return MaglevdInfo{ Name: c.name, Address: c.address, Connected: c.connected, LastError: c.lastErr, } } // Snapshot returns a deep-ish copy of the cached state for REST handlers. // Iteration order follows the corresponding *Order slice so the UI sees a // stable, RPC-defined order across reloads. func (c *maglevClient) Snapshot() *StateSnapshot { c.mu.RLock() defer c.mu.RUnlock() snap := &StateSnapshot{ Maglevd: MaglevdInfo{ Name: c.name, Address: c.address, Connected: c.connected, LastError: c.lastErr, }, Frontends: make([]*FrontendSnapshot, 0, len(c.cache.FrontendsOrder)), Backends: make([]*BackendSnapshot, 0, len(c.cache.BackendsOrder)), HealthChecks: make([]*HealthCheckSnapshot, 0, len(c.cache.HealthCheckOrder)), VPPInfo: c.cache.VPPInfo, } for _, name := range c.cache.FrontendsOrder { if f, ok := c.cache.Frontends[name]; ok { snap.Frontends = append(snap.Frontends, f) } } for _, name := range c.cache.BackendsOrder { if b, ok := c.cache.Backends[name]; ok { snap.Backends = append(snap.Backends, b) } } for _, name := range c.cache.HealthCheckOrder { if h, ok := c.cache.HealthChecks[name]; ok { snap.HealthChecks = append(snap.HealthChecks, h) } } return snap } // refreshAll pulls a full fresh view of the maglevd's state into the cache. // Called from the refreshLoop every 30s and immediately after a successful // reconnect. func (c *maglevClient) refreshAll(ctx context.Context) error { rctx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() frontends := map[string]*FrontendSnapshot{} fl, err := c.api.ListFrontends(rctx, &grpcapi.ListFrontendsRequest{}) if err != nil { return fmt.Errorf("list frontends: %w", err) } frontendsOrder := append([]string(nil), fl.GetFrontendNames()...) for _, name := range frontendsOrder { fi, err := c.api.GetFrontend(rctx, &grpcapi.GetFrontendRequest{Name: name}) if err != nil { return fmt.Errorf("get frontend %s: %w", name, err) } frontends[name] = frontendFromProto(fi) } backends := map[string]*BackendSnapshot{} bl, err := c.api.ListBackends(rctx, &grpcapi.ListBackendsRequest{}) if err != nil { return fmt.Errorf("list backends: %w", err) } backendsOrder := append([]string(nil), bl.GetBackendNames()...) for _, name := range backendsOrder { bi, err := c.api.GetBackend(rctx, &grpcapi.GetBackendRequest{Name: name}) if err != nil { return fmt.Errorf("get backend %s: %w", name, err) } backends[name] = backendFromProto(bi) } healthchecks := map[string]*HealthCheckSnapshot{} hl, err := c.api.ListHealthChecks(rctx, &grpcapi.ListHealthChecksRequest{}) if err != nil { return fmt.Errorf("list healthchecks: %w", err) } healthCheckOrder := append([]string(nil), hl.GetNames()...) for _, name := range healthCheckOrder { hi, err := c.api.GetHealthCheck(rctx, &grpcapi.GetHealthCheckRequest{Name: name}) if err != nil { return fmt.Errorf("get healthcheck %s: %w", name, err) } healthchecks[name] = healthCheckFromProto(hi) } var vppInfo *VPPInfoSnapshot if vi, err := c.api.GetVPPInfo(rctx, &grpcapi.GetVPPInfoRequest{}); err == nil { vppInfo = &VPPInfoSnapshot{ Version: vi.GetVersion(), BuildDate: vi.GetBuildDate(), PID: vi.GetPid(), BoottimeNs: vi.GetBoottimeNs(), ConnecttimeNs: vi.GetConnecttimeNs(), } } c.mu.Lock() c.cache.Frontends = frontends c.cache.FrontendsOrder = frontendsOrder c.cache.Backends = backends c.cache.BackendsOrder = backendsOrder c.cache.HealthChecks = healthchecks c.cache.HealthCheckOrder = healthCheckOrder c.cache.VPPInfo = vppInfo c.cache.LastRefresh = time.Now() c.mu.Unlock() return nil } // watchLoop subscribes to WatchEvents and feeds the broker until the context // is cancelled. Reconnects with exponential backoff on stream errors. func (c *maglevClient) watchLoop(ctx context.Context) { backoff := time.Second maxBackoff := 30 * time.Second for { if ctx.Err() != nil { return } if err := c.watchOnce(ctx); err != nil { if ctx.Err() != nil { return } slog.Warn("watch-disconnected", "maglevd", c.name, "err", err) c.setConnected(false, err.Error()) select { case <-ctx.Done(): return case <-time.After(backoff): } backoff *= 2 if backoff > maxBackoff { backoff = maxBackoff } continue } backoff = time.Second } } func (c *maglevClient) watchOnce(ctx context.Context) error { logFlag := true backendFlag := true frontendFlag := true req := &grpcapi.WatchRequest{ Log: &logFlag, LogLevel: "debug", Backend: &backendFlag, Frontend: &frontendFlag, } stream, err := c.api.WatchEvents(ctx, req) if err != nil { return fmt.Errorf("open stream: %w", err) } // Successful subscribe: mark connected and pull a fresh snapshot so // the REST cache is immediately ground-truth accurate. WatchEvents // itself replays current state as synthetic from==to events, which // will also update the cache as they arrive. c.setConnected(true, "") if err := c.refreshAll(ctx); err != nil { slog.Warn("refresh-after-watch", "maglevd", c.name, "err", err) } for { ev, err := stream.Recv() if err != nil { if errors.Is(err, io.EOF) || ctx.Err() != nil { return nil } return err } c.handleEvent(ev) } } // handleEvent applies an incoming gRPC event to the local cache and // publishes a corresponding BrowserEvent on the broker. func (c *maglevClient) handleEvent(ev *grpcapi.Event) { switch body := ev.GetEvent().(type) { case *grpcapi.Event_Log: le := body.Log if le == nil { return } attrs := make(map[string]string, len(le.GetAttrs())) for _, a := range le.GetAttrs() { attrs[a.GetKey()] = a.GetValue() } payload, _ := json.Marshal(LogEventPayload{ Level: le.GetLevel(), Msg: le.GetMsg(), Attrs: attrs, }) c.broker.Publish(BrowserEvent{ Maglevd: c.name, Type: "log", AtUnixNs: le.GetAtUnixNs(), Payload: payload, }) case *grpcapi.Event_Backend: be := body.Backend if be == nil || be.GetTransition() == nil { return } tr := transitionFromProto(be.GetTransition()) // maglevd replays current state on WatchEvents subscribe as a // synthetic event with from==to and at_unix_ns=0 (see // internal/grpcapi/server.go). It is not a real transition — the // in-process cache is already correct from refreshAll, so don't // touch LastTransition (which would clobber it with at=0 and // render as "55 years ago" in the browser) and don't forward to // the broker. if tr.From == tr.To { return } c.applyBackendTransition(be.GetBackendName(), tr) payload, _ := json.Marshal(BackendEventPayload{ Backend: be.GetBackendName(), Transition: *tr, }) c.broker.Publish(BrowserEvent{ Maglevd: c.name, Type: "backend", AtUnixNs: tr.AtUnixNs, Payload: payload, }) case *grpcapi.Event_Frontend: fe := body.Frontend if fe == nil || fe.GetTransition() == nil { return } tr := transitionFromProto(fe.GetTransition()) if tr.From == tr.To { return } payload, _ := json.Marshal(FrontendEventPayload{ Frontend: fe.GetFrontendName(), Transition: *tr, }) c.broker.Publish(BrowserEvent{ Maglevd: c.name, Type: "frontend", AtUnixNs: tr.AtUnixNs, Payload: payload, }) } } func (c *maglevClient) applyBackendTransition(name string, tr *TransitionRecord) { c.mu.Lock() defer c.mu.Unlock() b, ok := c.cache.Backends[name] if !ok { b = &BackendSnapshot{Name: name} c.cache.Backends[name] = b c.cache.BackendsOrder = append(c.cache.BackendsOrder, name) } b.State = tr.To b.LastTransition = tr b.Transitions = append(b.Transitions, tr) // Cap history to the most recent 20 entries to mirror what maglevd // returns from GetBackend. if len(b.Transitions) > 20 { b.Transitions = b.Transitions[len(b.Transitions)-20:] } } // refreshLoop pulls a fresh snapshot every 30s to catch anything the live // event stream may have missed (e.g. during a brief gRPC reconnect). func (c *maglevClient) refreshLoop(ctx context.Context) { t := time.NewTicker(30 * time.Second) defer t.Stop() for { select { case <-ctx.Done(): return case <-t.C: if err := c.refreshAll(ctx); err != nil { slog.Debug("refresh-all", "maglevd", c.name, "err", err) } } } } // healthLoop issues a cheap GetVPPInfo every 5s to surface connection drops // quickly. Errors flip the connection indicator; recoveries trigger a // refreshAll so the cache catches up. func (c *maglevClient) healthLoop(ctx context.Context) { t := time.NewTicker(5 * time.Second) defer t.Stop() for { select { case <-ctx.Done(): return case <-t.C: hctx, cancel := context.WithTimeout(ctx, 2*time.Second) _, err := c.api.GetVPPInfo(hctx, &grpcapi.GetVPPInfoRequest{}) cancel() if err != nil { c.setConnected(false, err.Error()) } else { c.setConnected(true, "") } } } } // ---- proto → JSON helpers -------------------------------------------------- func frontendFromProto(fi *grpcapi.FrontendInfo) *FrontendSnapshot { out := &FrontendSnapshot{ Name: fi.GetName(), Address: fi.GetAddress(), Protocol: fi.GetProtocol(), Port: fi.GetPort(), Description: fi.GetDescription(), SrcIPSticky: fi.GetSrcIpSticky(), } for _, p := range fi.GetPools() { ps := &PoolSnapshot{Name: p.GetName()} for _, pb := range p.GetBackends() { ps.Backends = append(ps.Backends, &PoolBackendSnapshot{ Name: pb.GetName(), Weight: pb.GetWeight(), EffectiveWeight: pb.GetEffectiveWeight(), }) } out.Pools = append(out.Pools, ps) } return out } func backendFromProto(bi *grpcapi.BackendInfo) *BackendSnapshot { out := &BackendSnapshot{ Name: bi.GetName(), Address: bi.GetAddress(), State: bi.GetState(), Enabled: bi.GetEnabled(), HealthCheck: bi.GetHealthcheck(), } for _, t := range bi.GetTransitions() { out.Transitions = append(out.Transitions, transitionFromProto(t)) } if n := len(out.Transitions); n > 0 { out.LastTransition = out.Transitions[n-1] } return out } func transitionFromProto(t *grpcapi.TransitionRecord) *TransitionRecord { return &TransitionRecord{ From: t.GetFrom(), To: t.GetTo(), AtUnixNs: t.GetAtUnixNs(), } } func healthCheckFromProto(h *grpcapi.HealthCheckInfo) *HealthCheckSnapshot { return &HealthCheckSnapshot{ Name: h.GetName(), Type: h.GetType(), Port: h.GetPort(), IntervalNs: h.GetIntervalNs(), FastIntervalNs: h.GetFastIntervalNs(), DownIntervalNs: h.GetDownIntervalNs(), TimeoutNs: h.GetTimeoutNs(), Rise: h.GetRise(), Fall: h.GetFall(), } }