Files
vpp-maglev/cmd/tester/main.go
Pim van Pelt 6293521157 New maglevt TUI component: out-of-band VIP health monitor
A small bubbletea TUI that reads maglev.yaml (repeatable --config),
enumerates every VIP, and probes each from outside the load balancer
on a tight cadence (default 100ms, ±10% jitter). HTTP/HTTPS VIPs get
a GET against a configurable URI (default /.well-known/ipng/healthz)
with per-VIP rolling latency (p50/p95/p99/max), lifetime N/FAIL
counters, LAST status, and a response-header tally. Non-HTTP VIPs
get a TCP connect probe. A bounded error panel classifies anomalies
as timeout / http-err / net-err / spike and auto-sizes to fill the
screen.

Utility: during a failover drill (backend flap, AS drain, config
push) the tally panel shows which backend each VIP is actually
steering to, with two-colour activity highlighting over a 5s
window — white = receiving traffic, grey = drained. Paired with
the rolling OK%/latency columns it gives an at-a-glance answer to
"is the VIP healthy from the outside right now, and which backend
is it hitting", without relying on maglevd's own view of the
world.

Also bumps Makefile/go.mod to build the new binary.
2026-04-15 01:23:52 +02:00

306 lines
10 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (c) 2026, Pim van Pelt <pim@ipng.ch>
// maglevt is a tiny TUI that reads maglev.yaml, enumerates every VIP
// and hits it on a tight cadence (default 100ms) from outside the load
// balancer. HTTP/HTTPS VIPs get a HEAD request with per-VIP rolling
// latency stats, success/failure ratios, and a running tally of a
// configurable response header (default: X-IPng-Frontend) so pool-
// failover events show up as visible reshuffles in the tally. Non-HTTP
// VIPs get a plain TCP-connect probe for liveness. See maglevt --help
// for the flag surface.
package main
import (
"context"
"flag"
"fmt"
"net"
"os"
"regexp"
"sort"
"strings"
"time"
tea "github.com/charmbracelet/bubbletea"
buildinfo "git.ipng.ch/ipng/vpp-maglev/cmd"
"git.ipng.ch/ipng/vpp-maglev/internal/config"
)
func main() {
if err := run(); err != nil {
fmt.Fprintf(os.Stderr, "maglevt: %v\n", err)
os.Exit(1)
}
}
func run() error {
var cfgPaths multiFlag
flag.Var(&cfgPaths, "config", "path to maglev.yaml (repeatable; also accepts a comma-separated list). Frontends are unioned across files, deduplicated by (address, protocol, port).")
interval := flag.Duration("interval", 100*time.Millisecond, "probe interval per VIP (±10% jitter)")
timeout := flag.Duration("timeout", 2*time.Second, "per-request timeout")
host := flag.String("host", "", "Host header override (default: VIP address literal)")
// Default probe URI: a small, deliberate health-check path that
// typically returns 204 No Content and doesn't hit the backend
// app logs. /.well-known/ipng/healthz is the convention for
// IPng deployments; override with --uri for anything else.
// --path is registered as a synonym for backward compatibility
// with the pre-1.0 flag name — both set the same variable, so
// whichever the operator types last on the command line wins.
const defaultURI = "/.well-known/ipng/healthz"
path := flag.String("uri", defaultURI, "HTTP request path (URI) used in the GET request")
flag.StringVar(path, "path", defaultURI, "alias for --uri")
header := flag.String("header", "X-IPng-Frontend", "response header to extract and tally")
insecure := flag.Bool("insecure", true, "skip TLS verification for HTTPS")
keepalive := flag.Bool("keepalive", false, "enable HTTP keep-alives (disabled by default so each probe opens a fresh connection — required for failover visibility)")
flag.BoolVar(keepalive, "k", false, "shorthand for --keepalive")
filter := flag.String("filter", "", "only probe frontends whose name matches this regex")
printVersion := flag.Bool("version", false, "print version and exit")
flag.Parse()
if *printVersion {
fmt.Printf("maglevt %s (commit %s, built %s)\n",
buildinfo.Version(), buildinfo.Commit(), buildinfo.Date())
return nil
}
if len(cfgPaths) == 0 {
cfgPaths = multiFlag{"/etc/vpp-maglev/maglev.yaml"}
}
// Load every requested config. A parse/semantic error on any of
// them is fatal — we want the user to see it rather than silently
// probing a reduced set of VIPs because one file was broken.
configs := make([]*config.Config, 0, len(cfgPaths))
for _, p := range cfgPaths {
cfg, res := config.Check(p)
if !res.OK() {
if res.ParseError != "" {
return fmt.Errorf("config parse %s: %s", p, res.ParseError)
}
return fmt.Errorf("config semantic %s: %s", p, res.SemanticError)
}
configs = append(configs, cfg)
}
var filterRe *regexp.Regexp
if *filter != "" {
var err error
filterRe, err = regexp.Compile(*filter)
if err != nil {
return fmt.Errorf("invalid --filter regex: %w", err)
}
}
opts := probeOpts{
Interval: *interval,
Timeout: *timeout,
Host: *host,
Path: *path,
Header: *header,
Insecure: *insecure,
KeepAlive: *keepalive,
}
vips := buildVIPsUnion(configs, cfgPaths, filterRe, opts)
if len(vips) == 0 {
return fmt.Errorf("no matching frontends in %s", strings.Join(cfgPaths, ", "))
}
m := Model{
cfgPath: strings.Join(cfgPaths, ", "),
vips: vips,
opts: opts,
startAt: time.Now(),
showDNS: true,
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
prog := tea.NewProgram(m, tea.WithAltScreen())
// Spawn one probe goroutine per VIP. Each sends probeResultMsg
// into the tea.Program via prog.Send, which is thread-safe.
// Alongside the prober we kick off a one-shot reverse-DNS
// lookup so the 'd' toggle has a hostname to display; the
// lookup is best-effort and simply drops on timeout or NXDOMAIN.
for _, v := range vips {
go runProbeLoop(ctx, v.info, opts, prog.Send)
go runDNSLookup(ctx, v.info, prog.Send)
}
_, err := prog.Run()
cancel()
// Give the workers a beat to observe ctx and exit. This isn't
// strictly required — the process is exiting anyway — but a clean
// shutdown avoids the "unexpected EOF writing to a closed
// transport" spam that HTTP clients sometimes emit on ctrl-C.
time.Sleep(50 * time.Millisecond)
return err
}
// buildVIPsUnion flattens frontends from multiple configs into a
// single deduplicated probe list, keyed by the (scheme, address,
// port) tuple. The typical use case is a pair of maglevds fronting
// the same two VIPs (vip0 / vip1, IPv4 + IPv6, × port 80 + 443 = 8
// probers) — the operator passes both yaml files and maglevt unions
// them so the probe grid doesn't grow duplicates from mirrored
// configs. The symbolic frontend name from yaml is intentionally
// dropped: when two files use the same name for different tuples
// (common in cross-deployment comparisons) the name would be
// ambiguous, and the tuple is the only stable identity. Only the
// --filter regex still uses the name, as a pre-dedup match.
//
// Dedup key uses net.IP.String() which canonicalises IPv6 zero-
// compression, so 2001:db8::1 and 2001:db8:0:0:0:0:0:1 collapse
// onto one entry. Iteration order across files is stable for
// deterministic TUI layout: within a file, frontends are visited
// in name-sorted order; across files, the first occurrence of each
// tuple wins and fixes its slot in the output.
func buildVIPsUnion(cfgs []*config.Config, cfgPaths []string, filterRe *regexp.Regexp, opts probeOpts) []*vipState {
_ = cfgPaths // reserved for future diagnostics (e.g. which file this tuple came from)
type key struct {
ip string
scheme string
port uint16
}
seen := map[key]*vipState{}
var order []key
for _, cfg := range cfgs {
names := make([]string, 0, len(cfg.Frontends))
for name := range cfg.Frontends {
names = append(names, name)
}
sortStringsInPlace(names)
for _, name := range names {
fe := cfg.Frontends[name]
if filterRe != nil && !filterRe.MatchString(name) {
continue
}
if strings.ToLower(fe.Protocol) != "tcp" || fe.Port == 0 {
continue
}
scheme := schemeForPort(fe.Port)
k := key{ip: fe.Address.String(), scheme: scheme, port: fe.Port}
if _, ok := seen[k]; ok {
continue // already claimed by an earlier file
}
info := &vipInfo{
idx: len(order),
scheme: scheme,
ip: fe.Address,
port: fe.Port,
}
if scheme == "http" || scheme == "https" {
info.url = buildURL(scheme, fe.Address, fe.Port, opts.Path)
info.client = newHTTPClient(opts)
}
v := &vipState{
info: info,
rolling: newRolling(),
tally: map[string]int{},
tallyOld: map[string]int{},
tallyNew: map[string]int{},
}
seen[k] = v
order = append(order, k)
}
}
out := make([]*vipState, len(order))
for i, k := range order {
out[i] = seen[k]
}
// Display order: IPv6 before IPv4, higher ports before lower
// within each address family, then address string as a final
// tiebreaker for determinism across runs. HTTPS :443 sitting
// above HTTP :80 matches the "secure first" reading order most
// operators expect, and clustering all the IPv6 rows at the top
// keeps a mixed-family deployment visually coherent as the
// operator scans down the table.
sort.SliceStable(out, func(i, j int) bool {
vi, vj := out[i].info, out[j].info
iIs6 := vi.ip.To4() == nil
jIs6 := vj.ip.To4() == nil
if iIs6 != jIs6 {
return iIs6
}
if vi.port != vj.port {
return vi.port > vj.port
}
return vi.ip.String() < vj.ip.String()
})
// Re-index after the sort so info.idx matches the slot each VIP
// now occupies in out — probeResultMsg.VIPIdx is looked up via
// this index in Model.Update, so they must agree.
for i, v := range out {
v.info.idx = i
}
return out
}
// multiFlag is a flag.Value implementation that accumulates repeated
// --config occurrences into a slice, and also accepts comma-separated
// values on a single flag instance so `--config a.yaml,b.yaml` and
// `--config a.yaml --config b.yaml` produce the same result.
type multiFlag []string
func (m *multiFlag) String() string {
return strings.Join(*m, ",")
}
func (m *multiFlag) Set(v string) error {
for _, p := range strings.Split(v, ",") {
p = strings.TrimSpace(p)
if p != "" {
*m = append(*m, p)
}
}
return nil
}
// schemeForPort infers HTTP vs HTTPS from the VIP's TCP port, falling
// back to "tcp" (connect-only probe) for anything that isn't
// unambiguously web. Intentionally narrow — we'd rather under-classify
// than send HEAD / at an IMAPS VIP and spew protocol errors into the
// logs. Adding more here is fine later (e.g. 8080/8443) but defaults
// should stay conservative.
func schemeForPort(port uint16) string {
switch port {
case 80:
return "http"
case 443:
return "https"
}
return "tcp"
}
// buildURL constructs the probe URL for an HTTP/HTTPS VIP. IPv6
// literals are bracketed per RFC 3986 §3.2.2 so the colon in the
// address isn't confused with the port separator.
func buildURL(scheme string, ip net.IP, port uint16, path string) string {
host := ip.String()
if ip.To4() == nil {
host = "[" + host + "]"
}
if path == "" {
path = "/"
}
return fmt.Sprintf("%s://%s:%d%s", scheme, host, port, path)
}
// sortStringsInPlace is a tiny shim so we don't import "sort" just
// for a single call from buildVIPs. The sorted-names slice is at
// most a few dozen elements so an insertion sort is fine and avoids
// the import churn.
func sortStringsInPlace(s []string) {
for i := 1; i < len(s); i++ {
for j := i; j > 0 && s[j-1] > s[j]; j-- {
s[j-1], s[j] = s[j], s[j-1]
}
}
}