// Copyright (c) 2026, Pim van Pelt package vpp import ( "errors" "fmt" "log/slog" "net" "regexp" "strconv" "strings" "go.fd.io/govpp/binapi/vlib" "git.ipng.ch/ipng/vpp-maglev/internal/config" "git.ipng.ch/ipng/vpp-maglev/internal/health" "git.ipng.ch/ipng/vpp-maglev/internal/metrics" ip_types "git.ipng.ch/ipng/vpp-maglev/internal/vpp/binapi/ip_types" lb "git.ipng.ch/ipng/vpp-maglev/internal/vpp/binapi/lb" lb_types "git.ipng.ch/ipng/vpp-maglev/internal/vpp/binapi/lb_types" ) // ErrFrontendNotFound is returned by SyncLBStateVIP when the caller asks for // a frontend name that does not exist in the config. var ErrFrontendNotFound = errors.New("frontend not found in config") // vipKey uniquely identifies a VPP LB VIP by its prefix, protocol, and port. type vipKey struct { prefix string // canonical CIDR form protocol uint8 port uint16 } // desiredVIP is the sync's view of one VIP derived from the maglev config. type desiredVIP struct { Prefix *net.IPNet Protocol uint8 // 6=TCP, 17=UDP, 255=any Port uint16 SrcIPSticky bool // lb_add_del_vip_v2.src_ip_sticky ASes map[string]desiredAS // keyed by AS IP string } // desiredAS is one application server to be installed under a VIP. type desiredAS struct { Address net.IP Weight uint8 // 0-100 Flush bool // if true, drop existing flows when transitioning to weight 0 } // syncStats counts changes made to the dataplane during a sync run. type syncStats struct { vipAdd int vipDel int asAdd int asDel int asWeight int } // recordSyncStats increments the Prometheus lbsync counters for one sync // run. scope is "all" for SyncLBStateAll and "vip" for SyncLBStateVIP. // Zero-valued kinds emit no increment (the counter stays at its previous // value for that label set). func recordSyncStats(scope string, st *syncStats) { if st.vipAdd > 0 { metrics.LBSyncTotal.WithLabelValues(scope, "vip_added").Add(float64(st.vipAdd)) } if st.vipDel > 0 { metrics.LBSyncTotal.WithLabelValues(scope, "vip_removed").Add(float64(st.vipDel)) } if st.asAdd > 0 { metrics.LBSyncTotal.WithLabelValues(scope, "as_added").Add(float64(st.asAdd)) } if st.asDel > 0 { metrics.LBSyncTotal.WithLabelValues(scope, "as_removed").Add(float64(st.asDel)) } if st.asWeight > 0 { metrics.LBSyncTotal.WithLabelValues(scope, "as_weight_updated").Add(float64(st.asWeight)) } } // SyncLBStateAll reconciles the full VPP load-balancer state with the given // config. For every frontend in cfg: // - if the VIP does not exist in VPP, create it; // - for every pool backend, add the application server if missing, or // update its weight if different. // // VIPs and ASes present in VPP but absent from the config are removed. // Returns an error if any VPP API call fails. func (c *Client) SyncLBStateAll(cfg *config.Config) error { if !c.IsConnected() { return errNotConnected } src := c.getStateSource() if src == nil { return fmt.Errorf("no state source configured") } cur, err := c.GetLBStateAll() if err != nil { return fmt.Errorf("read VPP LB state: %w", err) } desired := desiredFromConfig(cfg, src) ch, err := c.apiChannel() if err != nil { return err } defer ch.Close() slog.Info("vpp-lbsync-start", "scope", "all", "vips-desired", len(desired), "vips-current", len(cur.VIPs)) // Index both sides by (prefix, protocol, port). curByKey := make(map[vipKey]LBVIP, len(cur.VIPs)) for _, v := range cur.VIPs { curByKey[makeVIPKey(v.Prefix, v.Protocol, v.Port)] = v } desByKey := make(map[vipKey]desiredVIP, len(desired)) for _, d := range desired { desByKey[makeVIPKey(d.Prefix, d.Protocol, d.Port)] = d } var st syncStats // ---- pass 1: remove VIPs that are in VPP but not in config ---- for k, v := range curByKey { if _, keep := desByKey[k]; keep { continue } if err := removeVIP(ch, v, &st); err != nil { return err } } // ---- pass 2: add/update VIPs that are in config ---- for k, d := range desByKey { cur, existing := curByKey[k] var curPtr *LBVIP var curSticky bool if existing { curPtr = &cur curSticky = cur.SrcIPSticky } if err := reconcileVIP(ch, d, curPtr, curSticky, &st); err != nil { return err } } recordSyncStats("all", &st) slog.Info("vpp-lbsync-done", "scope", "all", "vip-added", st.vipAdd, "vip-removed", st.vipDel, "as-added", st.asAdd, "as-removed", st.asDel, "as-weight-updated", st.asWeight) return nil } // SyncLBStateVIP reconciles a single VIP (identified by frontend name) with // the given config. Unlike SyncLBStateAll, it never removes VIPs: if the // frontend is missing from cfg, SyncLBStateVIP returns ErrFrontendNotFound. // This is the right tool for targeted updates on a busy load-balancer with // many VIPs — only one VIP is read from VPP and only its ASes are modified. func (c *Client) SyncLBStateVIP(cfg *config.Config, feName string) error { if !c.IsConnected() { return errNotConnected } src := c.getStateSource() if src == nil { return fmt.Errorf("no state source configured") } fe, ok := cfg.Frontends[feName] if !ok { return fmt.Errorf("%q: %w", feName, ErrFrontendNotFound) } d := desiredFromFrontend(cfg, fe, src) cur, err := c.GetLBStateVIP(d.Prefix, d.Protocol, d.Port) if err != nil { return fmt.Errorf("read VPP VIP state: %w", err) } ch, err := c.apiChannel() if err != nil { return err } defer ch.Close() slog.Info("vpp-lbsync-start", "scope", "vip", "frontend", feName, "prefix", d.Prefix.String(), "protocol", protocolName(d.Protocol), "port", d.Port) var curSticky bool if cur != nil { curSticky = cur.SrcIPSticky } var st syncStats if err := reconcileVIP(ch, d, cur, curSticky, &st); err != nil { return err } recordSyncStats("vip", &st) slog.Info("vpp-lbsync-done", "scope", "vip", "frontend", feName, "vip-added", st.vipAdd, "as-added", st.asAdd, "as-removed", st.asDel, "as-weight-updated", st.asWeight) return nil } // reconcileVIP brings one VIP's state in VPP into alignment with the desired // state. If cur is nil the VIP is added from scratch; otherwise ASes are // added, removed, and reweighted individually. Stats are accumulated into st. // // curSticky is the src_ip_sticky flag VPP currently has programmed for this // VIP, as scraped by queryLBSticky. Callers only consult it when cur != nil, // and the scrape reads the same live lb_main.vips pool as lb_vip_dump, so a // matching entry is always present. When the flag differs from the desired // value, the VIP is torn down (ASes del+flushed, VIP deleted) and recreated // — VPP has no API to mutate src_ip_sticky on an existing VIP. func reconcileVIP(ch *loggedChannel, d desiredVIP, cur *LBVIP, curSticky bool, st *syncStats) error { if cur == nil { if err := addVIP(ch, d); err != nil { return err } st.vipAdd++ for _, as := range d.ASes { if err := addAS(ch, d.Prefix, d.Protocol, d.Port, as); err != nil { return err } st.asAdd++ } return nil } if curSticky != d.SrcIPSticky { slog.Info("vpp-lbsync-vip-recreate", "prefix", d.Prefix.String(), "protocol", protocolName(d.Protocol), "port", d.Port, "reason", "src-ip-sticky-changed", "from", curSticky, "to", d.SrcIPSticky) if err := removeVIP(ch, *cur, st); err != nil { return err } if err := addVIP(ch, d); err != nil { return err } st.vipAdd++ for _, as := range d.ASes { if err := addAS(ch, d.Prefix, d.Protocol, d.Port, as); err != nil { return err } st.asAdd++ } return nil } // VIP exists in both — reconcile ASes. curASes := make(map[string]LBAS, len(cur.ASes)) for _, a := range cur.ASes { curASes[a.Address.String()] = a } // Remove ASes that are in VPP but not desired. for addr, a := range curASes { if _, keep := d.ASes[addr]; keep { continue } if err := delAS(ch, cur.Prefix, cur.Protocol, cur.Port, a.Address); err != nil { return err } st.asDel++ } // Add new ASes, update weights on existing ones. for addr, a := range d.ASes { c, hit := curASes[addr] if !hit { if err := addAS(ch, d.Prefix, d.Protocol, d.Port, a); err != nil { return err } st.asAdd++ continue } if c.Weight != a.Weight { // Flush only on the transition from serving traffic (cur > 0) to // zero, and only when the desired state explicitly asks for it // (i.e. the backend was disabled, not merely drained). Steady- // state syncs where weight doesn't change never re-flush. flush := a.Flush && c.Weight > 0 && a.Weight == 0 if err := setASWeight(ch, d.Prefix, d.Protocol, d.Port, a, flush); err != nil { return err } st.asWeight++ } } return nil } // removeVIP flushes all ASes from a VIP and then deletes the VIP itself. func removeVIP(ch *loggedChannel, v LBVIP, st *syncStats) error { for _, as := range v.ASes { if err := delAS(ch, v.Prefix, v.Protocol, v.Port, as.Address); err != nil { return err } st.asDel++ } if err := delVIP(ch, v.Prefix, v.Protocol, v.Port); err != nil { return err } st.vipDel++ return nil } // desiredFromConfig flattens every frontend in cfg into a desired VIP set. // src provides the per-backend health state so weights and flush hints // reflect the current runtime state, not just the static config. func desiredFromConfig(cfg *config.Config, src StateSource) []desiredVIP { out := make([]desiredVIP, 0, len(cfg.Frontends)) for _, fe := range cfg.Frontends { out = append(out, desiredFromFrontend(cfg, fe, src)) } return out } // desiredFromFrontend builds the desired VIP for a single frontend. // // All backends across all pools of a frontend are merged into a single // application-server list so VPP knows about every backend that could ever // receive traffic. The per-AS weight and flush hint are computed by // asFromBackend from three inputs: (pool index, backend health state, // configured pool weight). // // When the same backend appears in multiple pools, the first pool it // appears in wins. func desiredFromFrontend(cfg *config.Config, fe config.Frontend, src StateSource) desiredVIP { bits := 32 if fe.Address.To4() == nil { bits = 128 } d := desiredVIP{ Prefix: &net.IPNet{IP: fe.Address, Mask: net.CIDRMask(bits, bits)}, Protocol: protocolFromConfig(fe.Protocol), Port: fe.Port, SrcIPSticky: fe.SrcIPSticky, ASes: make(map[string]desiredAS), } states := snapshotStates(fe, src) activePool := health.ActivePoolIndex(fe, states) for poolIdx, pool := range fe.Pools { for bName, pb := range pool.Backends { b, ok := cfg.Backends[bName] if !ok || b.Address == nil { continue } // Disabled backends (either via operator action or config) are // kept in the desired set so they stay installed in VPP with // weight=0 — they must not be deleted, otherwise a subsequent // enable has to re-add them and existing flow-table state (if // any) is lost. The state machine drives what weight to set // via health.BackendEffectiveWeight; we never filter on // b.Enabled here. addr := b.Address.String() if _, already := d.ASes[addr]; already { continue } w, flush := health.BackendEffectiveWeight(poolIdx, activePool, states[bName], pb.Weight) d.ASes[addr] = desiredAS{ Address: b.Address, Weight: w, Flush: flush, } } } return d } // EffectiveWeights returns the current effective VPP weight for every backend // in every pool of fe, keyed by poolIdx and backend name. Intended for // observability (e.g. the GetFrontend gRPC handler) — the sync path and the // checker's frontend-state logic use health.EffectiveWeights directly. func EffectiveWeights(fe config.Frontend, src StateSource) map[int]map[string]uint8 { return health.EffectiveWeights(fe, snapshotStates(fe, src)) } // snapshotStates builds a state map for every backend referenced by fe. It // takes one read-lock per backend via the StateSource interface, so the // caller gets a consistent view to feed into the pure health helpers. func snapshotStates(fe config.Frontend, src StateSource) map[string]health.State { states := make(map[string]health.State) for _, pool := range fe.Pools { for bName := range pool.Backends { if s, ok := src.BackendState(bName); ok { states[bName] = s } else { states[bName] = health.StateUnknown } } } return states } // ---- API call helpers ------------------------------------------------------ // defaultFlowsTableLength is sent as NewFlowsTableLength in lb_add_del_vip_v2. // The .api file declares default=1024 but that default is only applied by VAT/ // the CLI parser, not when a raw message is marshalled over the socket. If we // send 0, the plugin's vec_validate explodes (OOM / panic). Must be a power of // two — 1024 matches the default that would have been applied via CLI. const defaultFlowsTableLength = 1024 func addVIP(ch *loggedChannel, d desiredVIP) error { encap := encapForIP(d.Prefix.IP) req := &lb.LbAddDelVipV2{ Pfx: ip_types.NewAddressWithPrefix(*d.Prefix), Protocol: d.Protocol, Port: d.Port, Encap: encap, Type: lb_types.LB_API_SRV_TYPE_CLUSTERIP, NewFlowsTableLength: defaultFlowsTableLength, SrcIPSticky: d.SrcIPSticky, IsDel: false, } reply := &lb.LbAddDelVipV2Reply{} if err := ch.SendRequest(req).ReceiveReply(reply); err != nil { return fmt.Errorf("lb_add_del_vip_v2 add %s: %w", d.Prefix, err) } if reply.Retval != 0 { return fmt.Errorf("lb_add_del_vip_v2 add %s: retval=%d", d.Prefix, reply.Retval) } slog.Debug("vpp-lbsync-vip-add", "prefix", d.Prefix.String(), "protocol", protocolName(d.Protocol), "port", d.Port, "encap", encapName(encap), "src-ip-sticky", d.SrcIPSticky) return nil } func delVIP(ch *loggedChannel, prefix *net.IPNet, protocol uint8, port uint16) error { req := &lb.LbAddDelVipV2{ Pfx: ip_types.NewAddressWithPrefix(*prefix), Protocol: protocol, Port: port, IsDel: true, } reply := &lb.LbAddDelVipV2Reply{} if err := ch.SendRequest(req).ReceiveReply(reply); err != nil { return fmt.Errorf("lb_add_del_vip_v2 del %s: %w", prefix, err) } if reply.Retval != 0 { return fmt.Errorf("lb_add_del_vip_v2 del %s: retval=%d", prefix, reply.Retval) } slog.Debug("vpp-lbsync-vip-del", "prefix", prefix.String(), "protocol", protocolName(protocol), "port", port) return nil } func addAS(ch *loggedChannel, prefix *net.IPNet, protocol uint8, port uint16, a desiredAS) error { req := &lb.LbAddDelAsV2{ Pfx: ip_types.NewAddressWithPrefix(*prefix), Protocol: protocol, Port: port, AsAddress: ip_types.NewAddress(a.Address), Weight: a.Weight, IsDel: false, } reply := &lb.LbAddDelAsV2Reply{} if err := ch.SendRequest(req).ReceiveReply(reply); err != nil { return fmt.Errorf("lb_add_del_as_v2 add %s@%s: %w", a.Address, prefix, err) } if reply.Retval != 0 { return fmt.Errorf("lb_add_del_as_v2 add %s@%s: retval=%d", a.Address, prefix, reply.Retval) } slog.Debug("vpp-lbsync-as-add", "vip", prefix.String(), "protocol", protocolName(protocol), "port", port, "address", a.Address.String(), "weight", a.Weight) return nil } func delAS(ch *loggedChannel, prefix *net.IPNet, protocol uint8, port uint16, addr net.IP) error { req := &lb.LbAddDelAsV2{ Pfx: ip_types.NewAddressWithPrefix(*prefix), Protocol: protocol, Port: port, AsAddress: ip_types.NewAddress(addr), IsDel: true, IsFlush: true, } reply := &lb.LbAddDelAsV2Reply{} if err := ch.SendRequest(req).ReceiveReply(reply); err != nil { return fmt.Errorf("lb_add_del_as_v2 del %s@%s: %w", addr, prefix, err) } if reply.Retval != 0 { return fmt.Errorf("lb_add_del_as_v2 del %s@%s: retval=%d", addr, prefix, reply.Retval) } slog.Debug("vpp-lbsync-as-del", "vip", prefix.String(), "protocol", protocolName(protocol), "port", port, "address", addr.String()) return nil } func setASWeight(ch *loggedChannel, prefix *net.IPNet, protocol uint8, port uint16, a desiredAS, flush bool) error { req := &lb.LbAsSetWeight{ Pfx: ip_types.NewAddressWithPrefix(*prefix), Protocol: protocol, Port: port, AsAddress: ip_types.NewAddress(a.Address), Weight: a.Weight, IsFlush: flush, } reply := &lb.LbAsSetWeightReply{} if err := ch.SendRequest(req).ReceiveReply(reply); err != nil { return fmt.Errorf("lb_as_set_weight %s@%s: %w", a.Address, prefix, err) } if reply.Retval != 0 { return fmt.Errorf("lb_as_set_weight %s@%s: retval=%d", a.Address, prefix, reply.Retval) } slog.Debug("vpp-lbsync-as-weight", "vip", prefix.String(), "protocol", protocolName(protocol), "port", port, "address", a.Address.String(), "weight", a.Weight, "flush", flush) return nil } // ---- VIP snapshot scrape --------------------------------------------------- // TEMPORARY WORKAROUND: VPP's lb_vip_dump / lb_vip_details message does not // return the src_ip_sticky flag, and lb_vip_details also doesn't expose the // LB plugin's pool index — which is what the stats segment uses to key // per-VIP counters. We work around both by running `show lb vips verbose` // via the cli_inband API and parsing the human-readable output. This is // slow and fragile (the format is not a stable API) and must be replaced // once VPP ships an lb_vip_v2_dump that includes these fields. // lbVIPSnapshot holds the per-VIP facts that the scrape recovers. `index` // is the LB pool index (`vip - lbm->vips` in lb.c) used as the second // dimension of the vip_counters SimpleCounterStat in the stats segment. type lbVIPSnapshot struct { index uint32 sticky bool } // queryLBVIPSnapshot runs `show lb vips verbose` and parses it into a map // keyed by vipKey. The returned error is non-nil only when the cli_inband // RPC itself fails. func queryLBVIPSnapshot(ch *loggedChannel) (map[vipKey]lbVIPSnapshot, error) { req := &vlib.CliInband{Cmd: "show lb vips verbose"} reply := &vlib.CliInbandReply{} if err := ch.SendRequest(req).ReceiveReply(reply); err != nil { return nil, fmt.Errorf("cli_inband %q: %w", req.Cmd, err) } if reply.Retval != 0 { return nil, fmt.Errorf("cli_inband %q: retval=%d", req.Cmd, reply.Retval) } return parseLBVIPSnapshot(reply.Reply), nil } // queryLBSticky is a thin projection of queryLBVIPSnapshot for callers that // only care about the src_ip_sticky flag (the sync path). func queryLBSticky(ch *loggedChannel) (map[vipKey]bool, error) { snap, err := queryLBVIPSnapshot(ch) if err != nil { return nil, err } out := make(map[vipKey]bool, len(snap)) for k, v := range snap { out[k] = v.sticky } return out, nil } // lbVIPHeaderRe matches the first line of each VIP block in the output of // `show lb vips verbose`. VPP produces lines like: // // ip4-gre4 [1] 192.0.2.1/32 src_ip_sticky // ip6-gre6 [2] 2001:db8::1/128 // // Capture groups: 1 = pool index, 2 = prefix (CIDR), 3 = " src_ip_sticky" when present. var lbVIPHeaderRe = regexp.MustCompile( `\b(?:ip4-gre4|ip6-gre6|ip4-gre6|ip6-gre4|ip4-l3dsr|ip4-nat4|ip6-nat6)\s+\[(\d+)\]\s+(\S+)(\s+src_ip_sticky)?`, ) // lbVIPProtoRe matches the `protocol: port:` sub-line that appears // under each non-all-port VIP block. var lbVIPProtoRe = regexp.MustCompile(`protocol:(\d+)\s+port:(\d+)`) // parseLBVIPSnapshot turns the reply text of `show lb vips verbose` into a // map of vipKey → lbVIPSnapshot. It walks lines sequentially: each header // line starts a new VIP, and the following `protocol:

port:` line // (if any) refines the key. All-port VIPs have no protocol/port sub-line // and are recorded with protocol=255 and port=0 — the same key format used // by the rest of the sync path (see protocolFromConfig). func parseLBVIPSnapshot(text string) map[vipKey]lbVIPSnapshot { out := make(map[vipKey]lbVIPSnapshot) var havePending bool var curPrefix string var curIndex uint32 var curSticky bool var curProto uint8 = 255 var curPort uint16 flush := func() { if !havePending || curPrefix == "" { return } out[vipKey{prefix: curPrefix, protocol: curProto, port: curPort}] = lbVIPSnapshot{ index: curIndex, sticky: curSticky, } } for _, line := range strings.Split(text, "\n") { if m := lbVIPHeaderRe.FindStringSubmatch(line); m != nil { flush() havePending = true if idx, err := strconv.ParseUint(m[1], 10, 32); err == nil { curIndex = uint32(idx) } else { curIndex = 0 } curPrefix = canonicalCIDR(m[2]) curSticky = m[3] != "" curProto = 255 curPort = 0 continue } if !havePending { continue } if m := lbVIPProtoRe.FindStringSubmatch(line); m != nil { if p, err := strconv.Atoi(m[1]); err == nil { curProto = uint8(p) } if p, err := strconv.Atoi(m[2]); err == nil { curPort = uint16(p) } } } flush() return out } // canonicalCIDR parses a CIDR string and returns it in Go's canonical // net.IPNet.String() form so it matches the keys produced by makeVIPKey. // Returns the input unchanged if parsing fails — the header regex should // have validated it, but a mismatch shouldn't panic the sync path. func canonicalCIDR(s string) string { _, ipnet, err := net.ParseCIDR(s) if err != nil { return s } return ipnet.String() } // ---- utility --------------------------------------------------------------- func makeVIPKey(prefix *net.IPNet, protocol uint8, port uint16) vipKey { return vipKey{prefix: prefix.String(), protocol: protocol, port: port} } func protocolFromConfig(s string) uint8 { switch s { case "tcp": return 6 case "udp": return 17 } return 255 // any } func protocolName(p uint8) string { switch p { case 6: return "tcp" case 17: return "udp" case 255: return "any" } return fmt.Sprintf("%d", p) } func encapForIP(ip net.IP) lb_types.LbEncapType { if ip.To4() != nil { return lb_types.LB_API_ENCAP_TYPE_GRE4 } return lb_types.LB_API_ENCAP_TYPE_GRE6 } func encapName(e lb_types.LbEncapType) string { switch e { case lb_types.LB_API_ENCAP_TYPE_GRE4: return "gre4" case lb_types.LB_API_ENCAP_TYPE_GRE6: return "gre6" } return fmt.Sprintf("%d", e) }