// Copyright (c) 2026, Pim van Pelt package vpp import ( "context" "fmt" "log/slog" "sort" "time" "go.fd.io/govpp/adapter" "git.ipng.ch/ipng/vpp-maglev/internal/metrics" ) // lbStatsInterval is how often lbStatsLoop scrapes per-VIP and per-backend // counters from the VPP stats segment. Hard-coded for now; the scrape // feeds both slog.Debug lines and the Prometheus collector. const lbStatsInterval = 5 * time.Second // LB VIP counter names as they appear in the VPP stats segment. These // come from lb_foreach_vip_counter in src/plugins/lb/lb.h — each entry is // registered with only .name set, so the stats segment exposes them at // the top level (spaces and all). Replace if the VPP plugin renames them. const ( lbStatNextPacket = "/packet from existing sessions" lbStatFirstPacket = "/first session packet" lbStatUntrackedPkt = "/untracked packet" lbStatNoServer = "/no server configured" ) // lbStatPatterns is the full list of anchored regexes passed to DumpStats // for one scrape cycle: the four LB-plugin SimpleCounters plus the FIB // CombinedCounter for per-route packet+byte totals. Doing it in a single // DumpStats avoids walking the stats segment twice. var lbStatPatterns = []string{ `^/packet from existing sessions$`, `^/first session packet$`, `^/untracked packet$`, `^/no server configured$`, `^/net/route/to$`, } // lbStatsLoop periodically scrapes the LB plugin's per-VIP counters and // the FIB's /net/route/to combined counter for both VIPs and backends, // publishes the results to the atomic snapshots read by Prometheus, and // emits one slog.Debug line per VIP and per backend. Exits when ctx is // cancelled. func (c *Client) lbStatsLoop(ctx context.Context) { ticker := time.NewTicker(lbStatsInterval) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: } if err := c.scrapeLBStats(); err != nil { slog.Debug("vpp-lb-stats-error", "err", err) } } } // scrapeLBStats runs one full scrape cycle: discover VIPs via cli_inband, // look up FIB stats_indices for every VIP and every backend via // ip_route_lookup, dump all five stats-segment paths in one DumpStats // call, and reduce the counters into the two published snapshots. func (c *Client) scrapeLBStats() error { if !c.IsConnected() { return nil } ch, err := c.apiChannel() if err != nil { return err } defer ch.Close() snap, err := queryLBVIPSnapshot(ch) if err != nil { return fmt.Errorf("query vip snapshot: %w", err) } // Resolve FIB stats_indices for every VIP. The LB plugin installs a // host-prefix FIB entry per VIP (lb.c:990), so the exact=0 lookup // lands on it in the common case. See fibStatsIndex for the exact=0 // caveat when a covering route shadows the host prefix. vipStatsIdx := make(map[vipKey]uint32, len(snap)) for k := range snap { addr, _, err := vipKeyToIP(k) if err != nil { continue } idx, err := fibStatsIndex(ch, addr) if err != nil { slog.Debug("vpp-vip-route-lookup-failed", "prefix", k.prefix, "err", err) continue } vipStatsIdx[k] = idx } // Resolve FIB stats_indices for every backend in the running config. type backendLookup struct { name, addr string index uint32 } var backends []backendLookup if src := c.getStateSource(); src != nil { if cfg := src.Config(); cfg != nil { names := make([]string, 0, len(cfg.Backends)) for name := range cfg.Backends { names = append(names, name) } sort.Strings(names) // stable snapshot order for _, name := range names { b := cfg.Backends[name] if b.Address == nil { continue } idx, err := fibStatsIndex(ch, b.Address) if err != nil { slog.Debug("vpp-backend-route-lookup-failed", "backend", name, "address", b.Address.String(), "err", err) continue } backends = append(backends, backendLookup{ name: name, addr: b.Address.String(), index: idx, }) } } } c.mu.Lock() sc := c.statsClient c.mu.Unlock() if sc == nil { return nil } entries, err := sc.DumpStats(lbStatPatterns...) if err != nil { return fmt.Errorf("dump stats: %w", err) } nextPkt := findSimpleCounter(entries, lbStatNextPacket) firstPkt := findSimpleCounter(entries, lbStatFirstPacket) untracked := findSimpleCounter(entries, lbStatUntrackedPkt) noServer := findSimpleCounter(entries, lbStatNoServer) routeTo := findCombinedCounter(entries, routeToStatPath) // ---- VIP snapshot ---- vipOut := make([]metrics.VIPStatEntry, 0, len(snap)) for key, info := range snap { lbIdx := int(info.index) entry := metrics.VIPStatEntry{ Prefix: key.prefix, Protocol: protocolName(key.protocol), Port: key.port, NextPkt: reduceSimpleCounter(nextPkt, lbIdx), FirstPkt: reduceSimpleCounter(firstPkt, lbIdx), Untracked: reduceSimpleCounter(untracked, lbIdx), NoServer: reduceSimpleCounter(noServer, lbIdx), } if fibIdx, ok := vipStatsIdx[key]; ok { entry.Packets, entry.Bytes = reduceCombinedCounter(routeTo, int(fibIdx)) } vipOut = append(vipOut, entry) slog.Debug("vpp-vip-stats", "prefix", entry.Prefix, "protocol", entry.Protocol, "port", entry.Port, "next-packet", entry.NextPkt, "first-packet", entry.FirstPkt, "untracked", entry.Untracked, "no-server", entry.NoServer, "packets", entry.Packets, "bytes", entry.Bytes, ) } c.lbStatsSnap.Store(&vipOut) // ---- backend snapshot ---- backendOut := make([]metrics.BackendRouteStat, 0, len(backends)) for _, l := range backends { pkts, byts := reduceCombinedCounter(routeTo, int(l.index)) entry := metrics.BackendRouteStat{ Backend: l.name, Address: l.addr, Packets: pkts, Bytes: byts, } backendOut = append(backendOut, entry) slog.Debug("vpp-backend-route-stats", "backend", entry.Backend, "address", entry.Address, "packets", entry.Packets, "bytes", entry.Bytes, ) } c.backendRouteSnap.Store(&backendOut) return nil } // findSimpleCounter returns the SimpleCounterStat matching name, or nil if // not found. Stats segment names are byte slices, so we compare as string. func findSimpleCounter(entries []adapter.StatEntry, name string) adapter.SimpleCounterStat { for _, e := range entries { if string(e.Name) != name { continue } if s, ok := e.Data.(adapter.SimpleCounterStat); ok { return s } } return nil } // reduceSimpleCounter sums per-worker values at column i. It tolerates a // short per-worker vector (which can happen right after a VIP is added, // before a worker has observed it) by skipping out-of-range rows. func reduceSimpleCounter(s adapter.SimpleCounterStat, i int) uint64 { var sum uint64 for _, thread := range s { if i >= 0 && i < len(thread) { sum += uint64(thread[i]) } } return sum }