// Copyright (c) 2026, Pim van Pelt package vpp import ( "errors" "fmt" "log/slog" "net" "git.ipng.ch/ipng/vpp-maglev/internal/config" "git.ipng.ch/ipng/vpp-maglev/internal/health" ip_types "git.ipng.ch/ipng/vpp-maglev/internal/vpp/binapi/ip_types" lb "git.ipng.ch/ipng/vpp-maglev/internal/vpp/binapi/lb" lb_types "git.ipng.ch/ipng/vpp-maglev/internal/vpp/binapi/lb_types" ) // ErrFrontendNotFound is returned by SyncLBStateVIP when the caller asks for // a frontend name that does not exist in the config. var ErrFrontendNotFound = errors.New("frontend not found in config") // vipKey uniquely identifies a VPP LB VIP by its prefix, protocol, and port. type vipKey struct { prefix string // canonical CIDR form protocol uint8 port uint16 } // desiredVIP is the sync's view of one VIP derived from the maglev config. type desiredVIP struct { Prefix *net.IPNet Protocol uint8 // 6=TCP, 17=UDP, 255=any Port uint16 ASes map[string]desiredAS // keyed by AS IP string } // desiredAS is one application server to be installed under a VIP. type desiredAS struct { Address net.IP Weight uint8 // 0-100 Flush bool // if true, drop existing flows when transitioning to weight 0 } // syncStats counts changes made to the dataplane during a sync run. type syncStats struct { vipAdd int vipDel int asAdd int asDel int asWeight int } // SyncLBStateAll reconciles the full VPP load-balancer state with the given // config. For every frontend in cfg: // - if the VIP does not exist in VPP, create it; // - for every pool backend, add the application server if missing, or // update its weight if different. // // VIPs and ASes present in VPP but absent from the config are removed. // Returns an error if any VPP API call fails. func (c *Client) SyncLBStateAll(cfg *config.Config) error { if !c.IsConnected() { return errNotConnected } src := c.getStateSource() if src == nil { return fmt.Errorf("no state source configured") } cur, err := c.GetLBStateAll() if err != nil { return fmt.Errorf("read VPP LB state: %w", err) } desired := desiredFromConfig(cfg, src) ch, err := c.apiChannel() if err != nil { return err } defer ch.Close() slog.Info("vpp-lbsync-start", "scope", "all", "vips-desired", len(desired), "vips-current", len(cur.VIPs)) // Index both sides by (prefix, protocol, port). curByKey := make(map[vipKey]LBVIP, len(cur.VIPs)) for _, v := range cur.VIPs { curByKey[makeVIPKey(v.Prefix, v.Protocol, v.Port)] = v } desByKey := make(map[vipKey]desiredVIP, len(desired)) for _, d := range desired { desByKey[makeVIPKey(d.Prefix, d.Protocol, d.Port)] = d } var st syncStats // ---- pass 1: remove VIPs that are in VPP but not in config ---- for k, v := range curByKey { if _, keep := desByKey[k]; keep { continue } if err := removeVIP(ch, v, &st); err != nil { return err } } // ---- pass 2: add/update VIPs that are in config ---- for k, d := range desByKey { cur, existing := curByKey[k] var curPtr *LBVIP if existing { curPtr = &cur } if err := reconcileVIP(ch, d, curPtr, &st); err != nil { return err } } slog.Info("vpp-lbsync-done", "scope", "all", "vip-added", st.vipAdd, "vip-removed", st.vipDel, "as-added", st.asAdd, "as-removed", st.asDel, "as-weight-updated", st.asWeight) return nil } // SyncLBStateVIP reconciles a single VIP (identified by frontend name) with // the given config. Unlike SyncLBStateAll, it never removes VIPs: if the // frontend is missing from cfg, SyncLBStateVIP returns ErrFrontendNotFound. // This is the right tool for targeted updates on a busy load-balancer with // many VIPs — only one VIP is read from VPP and only its ASes are modified. func (c *Client) SyncLBStateVIP(cfg *config.Config, feName string) error { if !c.IsConnected() { return errNotConnected } src := c.getStateSource() if src == nil { return fmt.Errorf("no state source configured") } fe, ok := cfg.Frontends[feName] if !ok { return fmt.Errorf("%q: %w", feName, ErrFrontendNotFound) } d := desiredFromFrontend(cfg, fe, src) cur, err := c.GetLBStateVIP(d.Prefix, d.Protocol, d.Port) if err != nil { return fmt.Errorf("read VPP VIP state: %w", err) } ch, err := c.apiChannel() if err != nil { return err } defer ch.Close() slog.Info("vpp-lbsync-start", "scope", "vip", "frontend", feName, "prefix", d.Prefix.String(), "protocol", protocolName(d.Protocol), "port", d.Port) var st syncStats if err := reconcileVIP(ch, d, cur, &st); err != nil { return err } slog.Info("vpp-lbsync-done", "scope", "vip", "frontend", feName, "vip-added", st.vipAdd, "as-added", st.asAdd, "as-removed", st.asDel, "as-weight-updated", st.asWeight) return nil } // reconcileVIP brings one VIP's state in VPP into alignment with the desired // state. If cur is nil the VIP is added from scratch; otherwise ASes are // added, removed, and reweighted individually. Stats are accumulated into st. func reconcileVIP(ch *loggedChannel, d desiredVIP, cur *LBVIP, st *syncStats) error { if cur == nil { if err := addVIP(ch, d); err != nil { return err } st.vipAdd++ for _, as := range d.ASes { if err := addAS(ch, d.Prefix, d.Protocol, d.Port, as); err != nil { return err } st.asAdd++ } return nil } // VIP exists in both — reconcile ASes. curASes := make(map[string]LBAS, len(cur.ASes)) for _, a := range cur.ASes { curASes[a.Address.String()] = a } // Remove ASes that are in VPP but not desired. for addr, a := range curASes { if _, keep := d.ASes[addr]; keep { continue } if err := delAS(ch, cur.Prefix, cur.Protocol, cur.Port, a.Address); err != nil { return err } st.asDel++ } // Add new ASes, update weights on existing ones. for addr, a := range d.ASes { c, hit := curASes[addr] if !hit { if err := addAS(ch, d.Prefix, d.Protocol, d.Port, a); err != nil { return err } st.asAdd++ continue } if c.Weight != a.Weight { // Flush only on the transition from serving traffic (cur > 0) to // zero, and only when the desired state explicitly asks for it // (i.e. the backend was disabled, not merely drained). Steady- // state syncs where weight doesn't change never re-flush. flush := a.Flush && c.Weight > 0 && a.Weight == 0 if err := setASWeight(ch, d.Prefix, d.Protocol, d.Port, a, flush); err != nil { return err } st.asWeight++ } } return nil } // removeVIP flushes all ASes from a VIP and then deletes the VIP itself. func removeVIP(ch *loggedChannel, v LBVIP, st *syncStats) error { for _, as := range v.ASes { if err := delAS(ch, v.Prefix, v.Protocol, v.Port, as.Address); err != nil { return err } st.asDel++ } if err := delVIP(ch, v.Prefix, v.Protocol, v.Port); err != nil { return err } st.vipDel++ return nil } // desiredFromConfig flattens every frontend in cfg into a desired VIP set. // src provides the per-backend health state so weights and flush hints // reflect the current runtime state, not just the static config. func desiredFromConfig(cfg *config.Config, src StateSource) []desiredVIP { out := make([]desiredVIP, 0, len(cfg.Frontends)) for _, fe := range cfg.Frontends { out = append(out, desiredFromFrontend(cfg, fe, src)) } return out } // desiredFromFrontend builds the desired VIP for a single frontend. // // All backends across all pools of a frontend are merged into a single // application-server list so VPP knows about every backend that could ever // receive traffic. The per-AS weight and flush hint are computed by // asFromBackend from three inputs: (pool index, backend health state, // configured pool weight). // // When the same backend appears in multiple pools, the first pool it // appears in wins. func desiredFromFrontend(cfg *config.Config, fe config.Frontend, src StateSource) desiredVIP { bits := 32 if fe.Address.To4() == nil { bits = 128 } d := desiredVIP{ Prefix: &net.IPNet{IP: fe.Address, Mask: net.CIDRMask(bits, bits)}, Protocol: protocolFromConfig(fe.Protocol), Port: fe.Port, ASes: make(map[string]desiredAS), } // Snapshot backend states once so the active-pool computation and the // per-backend weight assignment see a consistent view. states := make(map[string]health.State) for _, pool := range fe.Pools { for bName := range pool.Backends { if s, ok := src.BackendState(bName); ok { states[bName] = s } else { states[bName] = health.StateUnknown } } } activePool := activePoolIndex(fe, states) for poolIdx, pool := range fe.Pools { for bName, pb := range pool.Backends { b, ok := cfg.Backends[bName] if !ok || b.Address == nil { continue } // Disabled backends (either via operator action or config) are // kept in the desired set so they stay installed in VPP with // weight=0 — they must not be deleted, otherwise a subsequent // enable has to re-add them and existing flow-table state (if // any) is lost. The state machine drives what weight to set // via asFromBackend; we never filter on b.Enabled here. addr := b.Address.String() if _, already := d.ASes[addr]; already { continue } w, flush := asFromBackend(poolIdx, activePool, states[bName], pb.Weight) d.ASes[addr] = desiredAS{ Address: b.Address, Weight: w, Flush: flush, } } } return d } // EffectiveWeights returns the current effective VPP weight for every backend // in every pool of fe, keyed by poolIdx and backend name. It runs the same // failover + state-aware weight calculation that the sync path uses, but // produces a plain map instead of desiredVIP — intended for observability // (e.g. the GetFrontend gRPC handler) and for robot-testing the failover // logic without needing a running VPP instance. // // The returned map layout is: result[poolIdx][backendName] = effective weight. func EffectiveWeights(fe config.Frontend, src StateSource) map[int]map[string]uint8 { states := make(map[string]health.State) for _, pool := range fe.Pools { for bName := range pool.Backends { if s, ok := src.BackendState(bName); ok { states[bName] = s } else { states[bName] = health.StateUnknown } } } activePool := activePoolIndex(fe, states) out := make(map[int]map[string]uint8, len(fe.Pools)) for poolIdx, pool := range fe.Pools { out[poolIdx] = make(map[string]uint8, len(pool.Backends)) for bName, pb := range pool.Backends { w, _ := asFromBackend(poolIdx, activePool, states[bName], pb.Weight) out[poolIdx][bName] = w } } return out } // activePoolIndex returns the index of the first pool in fe that contains at // least one backend currently in StateUp. This is the priority-failover // selector: pool[0] is the primary, pool[1] is the first fallback, and so on. // As long as pool[0] has any up backend, it stays active. When every pool[0] // backend leaves StateUp (down, paused, disabled, unknown), pool[1] is // promoted — and so on for further fallback tiers. When no pool has any up // backend, returns 0 (the return value is unobservable in that case since // every backend maps to weight 0 regardless of the active pool). func activePoolIndex(fe config.Frontend, states map[string]health.State) int { for i, pool := range fe.Pools { for bName := range pool.Backends { if states[bName] == health.StateUp { return i } } } return 0 } // asFromBackend is the pure mapping from (pool index, active pool, backend // state, config weight) to the desired VPP AS weight and flush hint. This is // the single source of truth for the state → dataplane rule — every LB change // flows through this function. // // A backend gets its configured weight iff it is up AND belongs to the // currently-active pool. Every other case yields weight 0. The only // state that produces flush=true is disabled. // // state in active pool not in active pool flush // -------- -------------- ------------------- ----- // unknown 0 0 no // up configured 0 (standby) no // down 0 0 no // paused 0 0 no // disabled 0 0 yes // removed handled separately (AS deleted via delAS) // // Flush semantics: flush=true means "if the AS currently has a non-zero // weight in VPP, drop its existing flow-table entries when setting weight // to 0". The reconciler only acts on flush when transitioning (current // weight > 0), so steady-state syncs never re-flush. Failover demotion // (e.g. pool[1] up→standby when pool[0] recovers) does NOT flush — we // let those sessions drain naturally. func asFromBackend(poolIdx, activePool int, state health.State, cfgWeight int) (weight uint8, flush bool) { switch state { case health.StateUp: if poolIdx == activePool { return clampWeight(cfgWeight), false } return 0, false case health.StateDisabled: return 0, true default: // unknown, down, paused: off, drain existing flows naturally. return 0, false } } // ---- API call helpers ------------------------------------------------------ // defaultFlowsTableLength is sent as NewFlowsTableLength in lb_add_del_vip_v2. // The .api file declares default=1024 but that default is only applied by VAT/ // the CLI parser, not when a raw message is marshalled over the socket. If we // send 0, the plugin's vec_validate explodes (OOM / panic). Must be a power of // two — 1024 matches the default that would have been applied via CLI. const defaultFlowsTableLength = 1024 func addVIP(ch *loggedChannel, d desiredVIP) error { encap := encapForIP(d.Prefix.IP) req := &lb.LbAddDelVipV2{ Pfx: ip_types.NewAddressWithPrefix(*d.Prefix), Protocol: d.Protocol, Port: d.Port, Encap: encap, Type: lb_types.LB_API_SRV_TYPE_CLUSTERIP, NewFlowsTableLength: defaultFlowsTableLength, IsDel: false, } reply := &lb.LbAddDelVipV2Reply{} if err := ch.SendRequest(req).ReceiveReply(reply); err != nil { return fmt.Errorf("lb_add_del_vip_v2 add %s: %w", d.Prefix, err) } if reply.Retval != 0 { return fmt.Errorf("lb_add_del_vip_v2 add %s: retval=%d", d.Prefix, reply.Retval) } slog.Debug("vpp-lbsync-vip-add", "prefix", d.Prefix.String(), "protocol", protocolName(d.Protocol), "port", d.Port, "encap", encapName(encap)) return nil } func delVIP(ch *loggedChannel, prefix *net.IPNet, protocol uint8, port uint16) error { req := &lb.LbAddDelVipV2{ Pfx: ip_types.NewAddressWithPrefix(*prefix), Protocol: protocol, Port: port, IsDel: true, } reply := &lb.LbAddDelVipV2Reply{} if err := ch.SendRequest(req).ReceiveReply(reply); err != nil { return fmt.Errorf("lb_add_del_vip_v2 del %s: %w", prefix, err) } if reply.Retval != 0 { return fmt.Errorf("lb_add_del_vip_v2 del %s: retval=%d", prefix, reply.Retval) } slog.Debug("vpp-lbsync-vip-del", "prefix", prefix.String(), "protocol", protocolName(protocol), "port", port) return nil } func addAS(ch *loggedChannel, prefix *net.IPNet, protocol uint8, port uint16, a desiredAS) error { req := &lb.LbAddDelAsV2{ Pfx: ip_types.NewAddressWithPrefix(*prefix), Protocol: protocol, Port: port, AsAddress: ip_types.NewAddress(a.Address), Weight: a.Weight, IsDel: false, } reply := &lb.LbAddDelAsV2Reply{} if err := ch.SendRequest(req).ReceiveReply(reply); err != nil { return fmt.Errorf("lb_add_del_as_v2 add %s@%s: %w", a.Address, prefix, err) } if reply.Retval != 0 { return fmt.Errorf("lb_add_del_as_v2 add %s@%s: retval=%d", a.Address, prefix, reply.Retval) } slog.Debug("vpp-lbsync-as-add", "vip", prefix.String(), "protocol", protocolName(protocol), "port", port, "address", a.Address.String(), "weight", a.Weight) return nil } func delAS(ch *loggedChannel, prefix *net.IPNet, protocol uint8, port uint16, addr net.IP) error { req := &lb.LbAddDelAsV2{ Pfx: ip_types.NewAddressWithPrefix(*prefix), Protocol: protocol, Port: port, AsAddress: ip_types.NewAddress(addr), IsDel: true, IsFlush: true, } reply := &lb.LbAddDelAsV2Reply{} if err := ch.SendRequest(req).ReceiveReply(reply); err != nil { return fmt.Errorf("lb_add_del_as_v2 del %s@%s: %w", addr, prefix, err) } if reply.Retval != 0 { return fmt.Errorf("lb_add_del_as_v2 del %s@%s: retval=%d", addr, prefix, reply.Retval) } slog.Debug("vpp-lbsync-as-del", "vip", prefix.String(), "protocol", protocolName(protocol), "port", port, "address", addr.String()) return nil } func setASWeight(ch *loggedChannel, prefix *net.IPNet, protocol uint8, port uint16, a desiredAS, flush bool) error { req := &lb.LbAsSetWeight{ Pfx: ip_types.NewAddressWithPrefix(*prefix), Protocol: protocol, Port: port, AsAddress: ip_types.NewAddress(a.Address), Weight: a.Weight, IsFlush: flush, } reply := &lb.LbAsSetWeightReply{} if err := ch.SendRequest(req).ReceiveReply(reply); err != nil { return fmt.Errorf("lb_as_set_weight %s@%s: %w", a.Address, prefix, err) } if reply.Retval != 0 { return fmt.Errorf("lb_as_set_weight %s@%s: retval=%d", a.Address, prefix, reply.Retval) } slog.Debug("vpp-lbsync-as-weight", "vip", prefix.String(), "protocol", protocolName(protocol), "port", port, "address", a.Address.String(), "weight", a.Weight, "flush", flush) return nil } // ---- utility --------------------------------------------------------------- func makeVIPKey(prefix *net.IPNet, protocol uint8, port uint16) vipKey { return vipKey{prefix: prefix.String(), protocol: protocol, port: port} } func protocolFromConfig(s string) uint8 { switch s { case "tcp": return 6 case "udp": return 17 } return 255 // any } func protocolName(p uint8) string { switch p { case 6: return "tcp" case 17: return "udp" case 255: return "any" } return fmt.Sprintf("%d", p) } func encapForIP(ip net.IP) lb_types.LbEncapType { if ip.To4() != nil { return lb_types.LB_API_ENCAP_TYPE_GRE4 } return lb_types.LB_API_ENCAP_TYPE_GRE6 } func encapName(e lb_types.LbEncapType) string { switch e { case lb_types.LB_API_ENCAP_TYPE_GRE4: return "gre4" case lb_types.LB_API_ENCAP_TYPE_GRE6: return "gre6" } return fmt.Sprintf("%d", e) } func clampWeight(w int) uint8 { if w < 0 { return 0 } if w > 100 { return 100 } return uint8(w) }