// Copyright (c) 2026, Pim van Pelt package vpp import ( "context" "fmt" "log/slog" "time" "go.fd.io/govpp/adapter" "git.ipng.ch/ipng/vpp-maglev/internal/metrics" ) // lbStatsInterval is how often lbStatsLoop scrapes per-VIP and per-backend // counters from the VPP stats segment. Hard-coded for now; the scrape // feeds both slog.Debug lines and the Prometheus collector. const lbStatsInterval = 5 * time.Second // LB VIP counter names as they appear in the VPP stats segment. These // come from lb_foreach_vip_counter in src/plugins/lb/lb.h — the plugin // passes them through vlib_simple_counter_main_t with only .name set // (.stat_segment_name unset), so vlib_validate_simple_counter / // vlib_stats_add_counter_vector register them under the name verbatim // with no leading slash. Contrast with /net/route/to below, which IS // registered with stat_segment_name="/net/route/to" in // src/vnet/dpo/load_balance.c. Replace if the VPP plugin renames them // or starts setting stat_segment_name. const ( lbStatNextPacket = "packet from existing sessions" lbStatFirstPacket = "first session packet" lbStatUntrackedPkt = "untracked packet" lbStatNoServer = "no server configured" ) // lbStatPatterns is the full list of anchored regexes passed to DumpStats // for one scrape cycle: the four LB-plugin SimpleCounters plus the FIB // CombinedCounter for per-VIP packet+byte totals. Doing it in a single // DumpStats avoids walking the stats segment twice. var lbStatPatterns = []string{ `^packet from existing sessions$`, `^first session packet$`, `^untracked packet$`, `^no server configured$`, `^/net/route/to$`, } // lbStatsLoop periodically scrapes the LB plugin's per-VIP counters and // the FIB's /net/route/to combined counter for both VIPs and backends, // publishes the results to the atomic snapshots read by Prometheus, and // emits one slog.Debug line per VIP and per backend. Exits when ctx is // cancelled. func (c *Client) lbStatsLoop(ctx context.Context) { ticker := time.NewTicker(lbStatsInterval) defer ticker.Stop() for { select { case <-ctx.Done(): return case <-ticker.C: } if err := c.scrapeLBStats(); err != nil { slog.Debug("vpp-lb-stats-error", "err", err) } } } // scrapeLBStats runs one full scrape cycle: discover VIPs via cli_inband, // look up the FIB stats_index for every VIP via ip_route_lookup, dump // all five stats-segment paths in one DumpStats call, and reduce the // counters into the per-VIP snapshot. // // NOTE: there is no per-backend counter here. The LB plugin's forwarding // node (src/plugins/lb/node.c) sets adj_index[VLIB_TX] to the AS's DPO // directly and enqueues into the lb*-gre* encap node, bypassing // ip6_lookup_inline / ip4_lookup_inline entirely. Since // lbm_to_counters is only incremented in those lookup paths (see // ip6_forward.h / ip4_forward.h), the backend's FIB-entry // load_balance stats_index never ticks for LB-forwarded traffic — // /net/route/to at that index would always report zero. Per-backend // packet/byte counters would need a new counter inside the LB plugin // itself (e.g. a vlib_combined_counter_main_t keyed by AS index // incremented in lb4-gre4 / lb6-gre6 / etc.). Until that lands // upstream we simply don't expose per-backend rates; the VIP-level // counters tell the dataplane story on the granularity VPP actually // provides today. func (c *Client) scrapeLBStats() error { if !c.IsConnected() { return nil } ch, err := c.apiChannel() if err != nil { return err } defer ch.Close() snap, err := queryLBVIPSnapshot(ch) if err != nil { return fmt.Errorf("query vip snapshot: %w", err) } // Resolve FIB stats_indices for every VIP. The LB plugin installs a // host-prefix FIB entry per VIP (lb.c:990), so the exact=0 lookup // lands on it in the common case. See fibStatsIndex for the exact=0 // caveat when a covering route shadows the host prefix. vipStatsIdx := make(map[vipKey]uint32, len(snap)) for k := range snap { addr, _, err := vipKeyToIP(k) if err != nil { continue } idx, err := fibStatsIndex(ch, addr) if err != nil { slog.Debug("vpp-vip-route-lookup-failed", "prefix", k.prefix, "err", err) continue } vipStatsIdx[k] = idx } c.mu.Lock() sc := c.statsClient c.mu.Unlock() if sc == nil { return nil } entries, err := sc.DumpStats(lbStatPatterns...) if err != nil { return fmt.Errorf("dump stats: %w", err) } nextPkt := findSimpleCounter(entries, lbStatNextPacket) firstPkt := findSimpleCounter(entries, lbStatFirstPacket) untracked := findSimpleCounter(entries, lbStatUntrackedPkt) noServer := findSimpleCounter(entries, lbStatNoServer) routeTo := findCombinedCounter(entries, routeToStatPath) // ---- VIP snapshot ---- vipOut := make([]metrics.VIPStatEntry, 0, len(snap)) for key, info := range snap { lbIdx := int(info.index) entry := metrics.VIPStatEntry{ Prefix: key.prefix, Protocol: protocolName(key.protocol), Port: key.port, NextPkt: reduceSimpleCounter(nextPkt, lbIdx), FirstPkt: reduceSimpleCounter(firstPkt, lbIdx), Untracked: reduceSimpleCounter(untracked, lbIdx), NoServer: reduceSimpleCounter(noServer, lbIdx), } if fibIdx, ok := vipStatsIdx[key]; ok { entry.Packets, entry.Bytes = reduceCombinedCounter(routeTo, int(fibIdx)) } vipOut = append(vipOut, entry) slog.Debug("vpp-vip-stats", "prefix", entry.Prefix, "protocol", entry.Protocol, "port", entry.Port, "next-packet", entry.NextPkt, "first-packet", entry.FirstPkt, "untracked", entry.Untracked, "no-server", entry.NoServer, "packets", entry.Packets, "bytes", entry.Bytes, ) } c.lbStatsSnap.Store(&vipOut) return nil } // findSimpleCounter returns the SimpleCounterStat matching name, or nil if // not found. Stats segment names are byte slices, so we compare as string. func findSimpleCounter(entries []adapter.StatEntry, name string) adapter.SimpleCounterStat { for _, e := range entries { if string(e.Name) != name { continue } if s, ok := e.Data.(adapter.SimpleCounterStat); ok { return s } } return nil } // reduceSimpleCounter sums per-worker values at column i. It tolerates a // short per-worker vector (which can happen right after a VIP is added, // before a worker has observed it) by skipping out-of-range rows. func reduceSimpleCounter(s adapter.SimpleCounterStat, i int) uint64 { var sum uint64 for _, thread := range s { if i >= 0 && i < len(thread) { sum += uint64(thread[i]) } } return sum }