// Copyright (c) 2026, Pim van Pelt package config import ( "fmt" "net" "os" "regexp" "sort" "strconv" "strings" "time" "gopkg.in/yaml.v3" ) // Config is the top-level parsed and validated configuration. type Config struct { HealthChecker HealthCheckerConfig VPP VPPConfig HealthChecks map[string]HealthCheck Backends map[string]Backend Frontends map[string]Frontend } // HealthCheckerConfig holds global health checker settings. type HealthCheckerConfig struct { TransitionHistory int Netns string // network namespace for probes; "" = current netns } // VPPConfig holds VPP-related configuration. type VPPConfig struct { LB VPPLBConfig } // VPPLBConfig holds load-balancer integration settings. type VPPLBConfig struct { // SyncInterval is how often the full dataplane reconciliation runs, // catching drift (e.g. manual changes to VPP). Defaults to 30s. SyncInterval time.Duration // IPv4SrcAddress is the source address VPP uses when encapsulating // IPv4 traffic into GRE4 tunnels to application servers. Required // when any frontend uses an IPv4 VIP; VPP GRE encap will fail if unset. IPv4SrcAddress net.IP // IPv6SrcAddress is the source address VPP uses when encapsulating // IPv6 traffic into GRE6 tunnels. Required when any frontend uses an // IPv6 VIP; VPP GRE encap will fail if unset. IPv6SrcAddress net.IP // StickyBucketsPerCore is the number of buckets (per worker thread) in // the established-flow table. Must be a power of 2. Defaults to 65536. StickyBucketsPerCore uint32 // FlowTimeout is the idle time after which an established flow is // removed from the table. Must be between 1 and 120 seconds inclusive. // Defaults to 40s. FlowTimeout time.Duration } // HealthCheck describes how to probe a backend. type HealthCheck struct { Type string Port uint16 // destination port; required for tcp/http/https HTTP *HTTPParams // non-nil for type http and https TCP *TCPParams // non-nil for type tcp ProbeIPv4Src net.IP // source address for IPv4 probes; nil = OS picks ProbeIPv6Src net.IP // source address for IPv6 probes; nil = OS picks Interval time.Duration FastInterval time.Duration // optional; used while health counter is degraded DownInterval time.Duration // optional; used while fully down Timeout time.Duration Rise int // default 2 Fall int // default 3 } // HTTPParams holds validated parameters for http/https health checks. type HTTPParams struct { Path string Host string // Host header; defaults to backend IP if empty ResponseCodeMin int // inclusive lower bound; default 200 ResponseCodeMax int // inclusive upper bound; default 200 ResponseRegexp *regexp.Regexp // nil if not configured ServerName string // TLS SNI; falls back to Host if empty (https only) InsecureSkipVerify bool // skip TLS certificate verification (https only) } // TCPParams holds validated parameters for tcp health checks. type TCPParams struct { SSL bool ServerName string InsecureSkipVerify bool } // Backend is a single named backend server. type Backend struct { Address net.IP HealthCheck string // name reference into Config.HealthChecks; "" = no probing, assume healthy Enabled bool // default true; false = exclude from serving entirely } // PoolBackend is a backend reference within a pool, with pool-local weight. type PoolBackend struct { Weight int // 0-100, default 100 } // Pool is an ordered tier of backends within a frontend. type Pool struct { Name string Backends map[string]PoolBackend // keyed by backend name } // Frontend is a single virtual IP entry. type Frontend struct { Description string Address net.IP Protocol string // "tcp", "udp", or "" (all traffic) Port uint16 // 0 means omitted (all ports) Pools []Pool // ordered tiers; first pool with any up backend is active SrcIPSticky bool // when true, VPP LB uses src-IP-based hashing for this VIP // FlushOnDown: when true (default), a backend transition to // StateDown causes maglevd to set is_flush=true on the VPP // weight update so existing flows pinned to the dead AS are // torn down immediately. With it false, down transitions only // drain (weight=0, keep flows), matching the pre-flag // behaviour. rise/fall debouncing already protects against // single-probe flaps, so defaulting to flush=true is safe for // the common case of a real outage. FlushOnDown bool } // ---- raw YAML types -------------------------------------------------------- type rawConfig struct { Maglev rawMaglev `yaml:"maglev"` } type rawMaglev struct { HealthChecker rawHealthCheckerCfg `yaml:"healthchecker"` VPP rawVPPCfg `yaml:"vpp"` HealthChecks map[string]rawHealthCheck `yaml:"healthchecks"` Backends map[string]rawBackend `yaml:"backends"` Frontends map[string]rawFrontend `yaml:"frontends"` } type rawHealthCheckerCfg struct { TransitionHistory int `yaml:"transition-history"` Netns string `yaml:"netns"` } type rawVPPCfg struct { LB rawVPPLBCfg `yaml:"lb"` } type rawVPPLBCfg struct { SyncInterval string `yaml:"sync-interval"` // Go duration; default 30s IPv4SrcAddress string `yaml:"ipv4-src-address"` IPv6SrcAddress string `yaml:"ipv6-src-address"` StickyBucketsPerCore *uint32 `yaml:"sticky-buckets-per-core"` // default 65536 FlowTimeout string `yaml:"flow-timeout"` // Go duration; default 40s, [1-120]s } type rawHealthCheck struct { Type string `yaml:"type"` Port uint16 `yaml:"port"` Params rawParams `yaml:"params"` ProbeIPv4Src string `yaml:"probe-ipv4-src"` ProbeIPv6Src string `yaml:"probe-ipv6-src"` Interval string `yaml:"interval"` FastInterval string `yaml:"fast-interval"` DownInterval string `yaml:"down-interval"` Timeout string `yaml:"timeout"` Rise int `yaml:"rise"` Fall int `yaml:"fall"` } type rawParams struct { // HTTP / HTTPS Path string `yaml:"path"` Host string `yaml:"host"` ResponseCode string `yaml:"response-code"` ResponseRegexp string `yaml:"response-regexp"` ServerName string `yaml:"server-name"` InsecureSkipVerify bool `yaml:"insecure-skip-verify"` // TCP SSL bool `yaml:"ssl"` } type rawBackend struct { Address string `yaml:"address"` HealthCheck string `yaml:"healthcheck"` Enabled *bool `yaml:"enabled"` // nil → default true } type rawPoolBackend struct { Weight *int `yaml:"weight"` // nil → default 100 } type rawPool struct { Name string `yaml:"name"` Backends map[string]rawPoolBackend `yaml:"backends"` } type rawFrontend struct { Description string `yaml:"description"` Address string `yaml:"address"` Protocol string `yaml:"protocol"` Port uint16 `yaml:"port"` Pools []rawPool `yaml:"pools"` SrcIPSticky bool `yaml:"src-ip-sticky"` FlushOnDown *bool `yaml:"flush-on-down"` // nil → default true } // ---- Check / Load ---------------------------------------------------------- // CheckResult holds the outcome of a config file validation. Exactly one of // ParseError and SemanticError is non-empty when the config is invalid; both // are empty on success. type CheckResult struct { ParseError string // YAML could not be read or parsed SemanticError string // YAML parsed but semantic validation failed } // OK reports whether the config is valid. func (r CheckResult) OK() bool { return r.ParseError == "" && r.SemanticError == "" } // Check reads and validates the config file at path, returning the parsed // Config (nil on failure) and a CheckResult that distinguishes YAML parse // errors from semantic validation errors. func Check(path string) (*Config, CheckResult) { data, err := os.ReadFile(path) if err != nil { return nil, CheckResult{ParseError: fmt.Sprintf("read %q: %v", path, err)} } var raw rawConfig if err := yaml.Unmarshal(data, &raw); err != nil { return nil, CheckResult{ParseError: fmt.Sprintf("parse yaml: %v", err)} } cfg, err := convert(&raw.Maglev) if err != nil { return nil, CheckResult{SemanticError: err.Error()} } return cfg, CheckResult{} } // Load reads and validates the config file at path. func Load(path string) (*Config, error) { cfg, result := Check(path) if !result.OK() { if result.ParseError != "" { return nil, fmt.Errorf("%s", result.ParseError) } return nil, fmt.Errorf("%s", result.SemanticError) } return cfg, nil } // parse unmarshals raw YAML bytes and converts them into a validated Config. // Used by tests; production code goes through Check or Load. func parse(data []byte) (*Config, error) { var raw rawConfig if err := yaml.Unmarshal(data, &raw); err != nil { return nil, fmt.Errorf("parse yaml: %v", err) } return convert(&raw.Maglev) } func convert(r *rawMaglev) (*Config, error) { cfg := &Config{} // ---- healthchecker -------------------------------------------------------- cfg.HealthChecker.Netns = r.HealthChecker.Netns cfg.HealthChecker.TransitionHistory = r.HealthChecker.TransitionHistory if cfg.HealthChecker.TransitionHistory == 0 { cfg.HealthChecker.TransitionHistory = 5 } if cfg.HealthChecker.TransitionHistory < 1 { return nil, fmt.Errorf("healthchecker.transition-history must be >= 1") } // ---- healthchecks --------------------------------------------------------- cfg.HealthChecks = make(map[string]HealthCheck, len(r.HealthChecks)) for name, rh := range r.HealthChecks { hc, err := convertHealthCheck(&rh) if err != nil { return nil, fmt.Errorf("healthcheck %q: %w", name, err) } cfg.HealthChecks[name] = hc } // ---- backends ------------------------------------------------------------- cfg.Backends = make(map[string]Backend, len(r.Backends)) for name, rb := range r.Backends { b, err := convertBackend(name, &rb, cfg.HealthChecks) if err != nil { return nil, fmt.Errorf("backend %q: %w", name, err) } cfg.Backends[name] = b } // ---- frontends ------------------------------------------------------------ cfg.Frontends = make(map[string]Frontend, len(r.Frontends)) for name, rf := range r.Frontends { fe, err := convertFrontend(name, &rf, cfg.Backends) if err != nil { return nil, fmt.Errorf("frontend %q: %w", name, err) } cfg.Frontends[name] = fe } // ---- cross-frontend: VIP-address family consistency ----------------------- // // VPP's LB plugin requires every VIP sharing a given IP prefix to use // the same encap type (GRE4 vs GRE6) — even when the VIPs sit on // different ports. The encap is determined by the backend address // family (see internal/vpp/lbsync.go desiredFromFrontend). So two // frontends on the same VIP address with backends in different // families (one IPv4 pool, one IPv6 pool) cannot both be programmed // into VPP: the second one fails at lb_add_del_vip_v2 time with // VNET_API_ERROR_INVALID_ARGUMENT (-73). Catching it here turns the // silent runtime failure into a clear config-load error. if err := validateVIPFamilyConsistency(cfg); err != nil { return nil, err } // ---- vpp ------------------------------------------------------------------ // Runs last so structural errors in healthchecks/backends/frontends are // reported first; operators fix those, then we tell them about the VPP // src-address requirements. if err := convertVPP(&r.VPP, &cfg.VPP); err != nil { return nil, err } return cfg, nil } // convertVPP parses and validates the maglev.vpp section. Missing src-address // fields are tolerated but logged at ERROR level so operators notice that VPP // GRE encap will fail without them. func convertVPP(r *rawVPPCfg, cfg *VPPConfig) error { // sync-interval: default 30s, must be > 0. if s := r.LB.SyncInterval; s != "" { d, err := time.ParseDuration(s) if err != nil { return fmt.Errorf("vpp.lb.sync-interval: %w", err) } if d <= 0 { return fmt.Errorf("vpp.lb.sync-interval must be > 0") } cfg.LB.SyncInterval = d } else { cfg.LB.SyncInterval = 30 * time.Second } // ipv4-src-address: optional here, but warned below if missing. if s := r.LB.IPv4SrcAddress; s != "" { ip := net.ParseIP(s) if ip == nil || ip.To4() == nil { return fmt.Errorf("vpp.lb.ipv4-src-address: %q is not a valid IPv4 address", s) } cfg.LB.IPv4SrcAddress = ip.To4() } // ipv6-src-address: optional here, but warned below if missing. if s := r.LB.IPv6SrcAddress; s != "" { ip := net.ParseIP(s) if ip == nil || ip.To4() != nil { return fmt.Errorf("vpp.lb.ipv6-src-address: %q is not a valid IPv6 address", s) } cfg.LB.IPv6SrcAddress = ip.To16() } // sticky-buckets-per-core: default 65536, must be power of 2. if p := r.LB.StickyBucketsPerCore; p != nil { n := *p if n == 0 || n&(n-1) != 0 { return fmt.Errorf("vpp.lb.sticky-buckets-per-core: %d must be a power of 2", n) } cfg.LB.StickyBucketsPerCore = n } else { cfg.LB.StickyBucketsPerCore = 65536 } // flow-timeout: default 40s, must be 1-120s inclusive and a whole number of seconds. if s := r.LB.FlowTimeout; s != "" { d, err := time.ParseDuration(s) if err != nil { return fmt.Errorf("vpp.lb.flow-timeout: %w", err) } if d%time.Second != 0 { return fmt.Errorf("vpp.lb.flow-timeout: %s must be a whole number of seconds", d) } if d < time.Second || d > 120*time.Second { return fmt.Errorf("vpp.lb.flow-timeout: %s out of range [1s, 120s]", d) } cfg.LB.FlowTimeout = d } else { cfg.LB.FlowTimeout = 40 * time.Second } // A missing src address is a hard error: VPP's GRE encap needs a source, // and every VIP we program uses GRE. Fail the config check so the // operator cannot start maglevd with a broken setup. if cfg.LB.IPv4SrcAddress == nil { return fmt.Errorf("vpp.lb.ipv4-src-address must be set; VPP GRE4 encap will fail for IPv4 VIPs") } if cfg.LB.IPv6SrcAddress == nil { return fmt.Errorf("vpp.lb.ipv6-src-address must be set; VPP GRE6 encap will fail for IPv6 VIPs") } return nil } func convertHealthCheck(r *rawHealthCheck) (HealthCheck, error) { h := HealthCheck{Type: r.Type, Port: r.Port} switch r.Type { case "icmp": // ICMP does not use ports. if r.Port != 0 { return HealthCheck{}, fmt.Errorf("type icmp does not use a port") } case "tcp": if r.Port == 0 { return HealthCheck{}, fmt.Errorf("type tcp requires port") } h.TCP = &TCPParams{ SSL: r.Params.SSL, ServerName: r.Params.ServerName, InsecureSkipVerify: r.Params.InsecureSkipVerify, } case "http", "https": if r.Port == 0 { return HealthCheck{}, fmt.Errorf("type %s requires port", r.Type) } if r.Params.Path == "" { return HealthCheck{}, fmt.Errorf("type http requires params.path") } min, max, err := parseCodeRange(r.Params.ResponseCode, 200) if err != nil { return HealthCheck{}, err } hp := &HTTPParams{ Path: r.Params.Path, Host: r.Params.Host, ResponseCodeMin: min, ResponseCodeMax: max, InsecureSkipVerify: r.Params.InsecureSkipVerify, } // TLS SNI: server-name takes precedence, falls back to host. hp.ServerName = r.Params.ServerName if hp.ServerName == "" { hp.ServerName = r.Params.Host } if r.Params.ResponseRegexp != "" { re, err := regexp.Compile(r.Params.ResponseRegexp) if err != nil { return HealthCheck{}, fmt.Errorf("invalid response-regexp %q: %w", r.Params.ResponseRegexp, err) } hp.ResponseRegexp = re } h.HTTP = hp default: return HealthCheck{}, fmt.Errorf("type must be \"icmp\", \"tcp\", \"http\", or \"https\", got %q", r.Type) } var err error if r.ProbeIPv4Src != "" { if h.ProbeIPv4Src, err = parseOptionalIPFamily(r.ProbeIPv4Src, 4, "probe-ipv4-src"); err != nil { return HealthCheck{}, err } } if r.ProbeIPv6Src != "" { if h.ProbeIPv6Src, err = parseOptionalIPFamily(r.ProbeIPv6Src, 6, "probe-ipv6-src"); err != nil { return HealthCheck{}, err } } if r.Interval == "" { return HealthCheck{}, fmt.Errorf("interval is required") } if h.Interval, err = time.ParseDuration(r.Interval); err != nil || h.Interval <= 0 { return HealthCheck{}, fmt.Errorf("interval %q must be a positive duration", r.Interval) } if r.FastInterval != "" { if h.FastInterval, err = time.ParseDuration(r.FastInterval); err != nil || h.FastInterval <= 0 { return HealthCheck{}, fmt.Errorf("fast-interval %q must be a positive duration", r.FastInterval) } } if r.DownInterval != "" { if h.DownInterval, err = time.ParseDuration(r.DownInterval); err != nil || h.DownInterval <= 0 { return HealthCheck{}, fmt.Errorf("down-interval %q must be a positive duration", r.DownInterval) } } if r.Timeout == "" { return HealthCheck{}, fmt.Errorf("timeout is required") } if h.Timeout, err = time.ParseDuration(r.Timeout); err != nil || h.Timeout <= 0 { return HealthCheck{}, fmt.Errorf("timeout %q must be a positive duration", r.Timeout) } h.Fall = r.Fall if h.Fall == 0 { h.Fall = 3 } if h.Fall < 1 { return HealthCheck{}, fmt.Errorf("fall must be >= 1") } h.Rise = r.Rise if h.Rise == 0 { h.Rise = 2 } if h.Rise < 1 { return HealthCheck{}, fmt.Errorf("rise must be >= 1") } return h, nil } func convertBackend(name string, r *rawBackend, hcs map[string]HealthCheck) (Backend, error) { ip := net.ParseIP(r.Address) if ip == nil { return Backend{}, fmt.Errorf("invalid address %q", r.Address) } b := Backend{ Address: ip, HealthCheck: r.HealthCheck, Enabled: boolDefault(r.Enabled, true), } if b.HealthCheck != "" { if _, ok := hcs[b.HealthCheck]; !ok { return Backend{}, fmt.Errorf("healthcheck %q not defined", b.HealthCheck) } } return b, nil } func convertFrontend(name string, r *rawFrontend, backends map[string]Backend) (Frontend, error) { fe := Frontend{ Description: r.Description, Protocol: r.Protocol, Port: r.Port, SrcIPSticky: r.SrcIPSticky, FlushOnDown: boolDefault(r.FlushOnDown, true), } ip := net.ParseIP(r.Address) if ip == nil { return Frontend{}, fmt.Errorf("invalid address %q", r.Address) } fe.Address = ip switch r.Protocol { case "", "tcp", "udp": default: return Frontend{}, fmt.Errorf("protocol must be \"tcp\", \"udp\", or omitted, got %q", r.Protocol) } if r.Port != 0 && r.Protocol == "" { return Frontend{}, fmt.Errorf("port requires protocol to be set") } if r.Protocol != "" && r.Port == 0 { return Frontend{}, fmt.Errorf("protocol %q requires port to be set (1-65535)", r.Protocol) } if len(r.Pools) == 0 { return Frontend{}, fmt.Errorf("pools must not be empty") } var firstFamily int firstBackend := true for pi, rp := range r.Pools { if rp.Name == "" { return Frontend{}, fmt.Errorf("pools[%d].name must not be empty", pi) } if len(rp.Backends) == 0 { return Frontend{}, fmt.Errorf("pool %q backends must not be empty", rp.Name) } pool := Pool{Name: rp.Name, Backends: make(map[string]PoolBackend, len(rp.Backends))} for bName, rpb := range rp.Backends { b, ok := backends[bName] if !ok { return Frontend{}, fmt.Errorf("pool %q backend %q not defined", rp.Name, bName) } fam := ipFamily(b.Address) if firstBackend { firstFamily = fam firstBackend = false } else if fam != firstFamily { return Frontend{}, fmt.Errorf("pool %q backend %q has different address family than first backend", rp.Name, bName) } w := intDefault(rpb.Weight, 100) if w < 0 || w > 100 { return Frontend{}, fmt.Errorf("pool %q backend %q weight %d out of range [0, 100]", rp.Name, bName, w) } pool.Backends[bName] = PoolBackend{Weight: w} } fe.Pools = append(fe.Pools, pool) } return fe, nil } // validateVIPFamilyConsistency walks cfg.Frontends, groups them by VIP // address, and rejects any group whose members disagree on the backend // address family used by their pools. See the call site in Parse for // why this matters (VPP LB plugin limitation). // // Each frontend already has its own within-frontend family invariant // (every backend in a frontend must share a family — enforced in // convertFrontend). This check adds the cross-frontend dimension: // frontends that happen to collide on the VIP address. func validateVIPFamilyConsistency(cfg *Config) error { type seen struct { family int frontendName string } byAddr := map[string]seen{} // Sort frontend names so the "first frontend on this address" // reported in errors is deterministic, independent of Go's // randomized map iteration. names := make([]string, 0, len(cfg.Frontends)) for name := range cfg.Frontends { names = append(names, name) } sort.Strings(names) for _, name := range names { fe := cfg.Frontends[name] fam := frontendBackendFamily(cfg, fe) if fam == 0 { continue // no valid backends; family is unknowable } addr := fe.Address.String() if prev, ok := byAddr[addr]; ok { if prev.family != fam { return fmt.Errorf( "frontend %q: VIP address %s is also used by frontend %q with IPv%d backends, "+ "but %q has IPv%d backends; VPP's LB plugin requires all VIPs sharing an "+ "address to use the same encap (backend family), so this config cannot be "+ "programmed — give the two frontends different VIP addresses", name, addr, prev.frontendName, prev.family, name, fam) } continue } byAddr[addr] = seen{family: fam, frontendName: name} } return nil } // frontendBackendFamily returns the address family (4 or 6) of the // first valid backend in the frontend's first pool. Returns 0 when no // backend is resolvable — convertFrontend already enforces that all // backends in a frontend share a family, so the first one is // authoritative. func frontendBackendFamily(cfg *Config, fe Frontend) int { if len(fe.Pools) == 0 { return 0 } for bName := range fe.Pools[0].Backends { if b, ok := cfg.Backends[bName]; ok && b.Address != nil { return ipFamily(b.Address) } } return 0 } // ---- helpers --------------------------------------------------------------- func parseOptionalIPFamily(s string, family int, field string) (net.IP, error) { if s == "" { return nil, nil } ip := net.ParseIP(s) if ip == nil { return nil, fmt.Errorf("%s %q is not a valid IP address", field, s) } if ipFamily(ip) != family { return nil, fmt.Errorf("%s %q must be an IPv%d address", field, s, family) } return ip, nil } func ipFamily(ip net.IP) int { if ip.To4() != nil { return 4 } return 6 } func parseCodeRange(s string, defaultCode int) (min, max int, err error) { if s == "" { return defaultCode, defaultCode, nil } if idx := strings.IndexByte(s, '-'); idx > 0 { min, err = strconv.Atoi(s[:idx]) if err != nil { return 0, 0, fmt.Errorf("invalid response-code range %q", s) } max, err = strconv.Atoi(s[idx+1:]) if err != nil { return 0, 0, fmt.Errorf("invalid response-code range %q", s) } return min, max, nil } min, err = strconv.Atoi(s) if err != nil { return 0, 0, fmt.Errorf("invalid response-code %q", s) } return min, min, nil } func boolDefault(p *bool, def bool) bool { if p == nil { return def } return *p } func intDefault(p *int, def int) int { if p == nil { return def } return *p }