Replace the cfgPath field in the TUI header with the system's fully-qualified hostname via gethostname + CNAME lookup, matching what `hostname -f` produces. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
324 lines
11 KiB
Go
324 lines
11 KiB
Go
// SPDX-License-Identifier: Apache-2.0
|
||
|
||
// maglevt is a tiny TUI that reads maglev.yaml, enumerates every VIP
|
||
// and hits it on a tight cadence (default 100ms) from outside the load
|
||
// balancer. HTTP/HTTPS VIPs get a HEAD request with per-VIP rolling
|
||
// latency stats, success/failure ratios, and a running tally of a
|
||
// configurable response header (default: X-IPng-Frontend) so pool-
|
||
// failover events show up as visible reshuffles in the tally. Non-HTTP
|
||
// VIPs get a plain TCP-connect probe for liveness. See maglevt --help
|
||
// for the flag surface.
|
||
package main
|
||
|
||
import (
|
||
"context"
|
||
"flag"
|
||
"fmt"
|
||
"net"
|
||
"os"
|
||
"regexp"
|
||
"sort"
|
||
"strings"
|
||
"time"
|
||
|
||
tea "github.com/charmbracelet/bubbletea"
|
||
|
||
buildinfo "git.ipng.ch/ipng/vpp-maglev/cmd"
|
||
"git.ipng.ch/ipng/vpp-maglev/internal/config"
|
||
)
|
||
|
||
func main() {
|
||
if err := run(); err != nil {
|
||
fmt.Fprintf(os.Stderr, "maglevt: %v\n", err)
|
||
os.Exit(1)
|
||
}
|
||
}
|
||
|
||
func run() error {
|
||
var cfgPaths multiFlag
|
||
flag.Var(&cfgPaths, "config", "path to maglev.yaml (repeatable; also accepts a comma-separated list). Frontends are unioned across files, deduplicated by (address, protocol, port).")
|
||
interval := flag.Duration("interval", 100*time.Millisecond, "probe interval per VIP (±10% jitter)")
|
||
timeout := flag.Duration("timeout", 2*time.Second, "per-request timeout")
|
||
host := flag.String("host", "", "Host header override (default: VIP address literal)")
|
||
// Default probe URI: a small, deliberate health-check path that
|
||
// typically returns 204 No Content and doesn't hit the backend
|
||
// app logs. /.well-known/ipng/healthz is the convention for
|
||
// IPng deployments; override with --uri for anything else.
|
||
// --path is registered as a synonym for backward compatibility
|
||
// with the pre-1.0 flag name — both set the same variable, so
|
||
// whichever the operator types last on the command line wins.
|
||
const defaultURI = "/.well-known/ipng/healthz"
|
||
path := flag.String("uri", defaultURI, "HTTP request path (URI) used in the GET request")
|
||
flag.StringVar(path, "path", defaultURI, "alias for --uri")
|
||
header := flag.String("header", "X-IPng-Frontend", "response header to extract and tally")
|
||
insecure := flag.Bool("insecure", true, "skip TLS verification for HTTPS")
|
||
keepalive := flag.Bool("keepalive", false, "enable HTTP keep-alives (disabled by default so each probe opens a fresh connection — required for failover visibility)")
|
||
flag.BoolVar(keepalive, "k", false, "shorthand for --keepalive")
|
||
filter := flag.String("filter", "", "only probe frontends whose name matches this regex")
|
||
printVersion := flag.Bool("version", false, "print version and exit")
|
||
flag.Parse()
|
||
|
||
if *printVersion {
|
||
fmt.Printf("maglevt %s (commit %s, built %s)\n",
|
||
buildinfo.Version(), buildinfo.Commit(), buildinfo.Date())
|
||
return nil
|
||
}
|
||
|
||
if len(cfgPaths) == 0 {
|
||
cfgPaths = multiFlag{"/etc/vpp-maglev/maglev.yaml"}
|
||
}
|
||
|
||
// Load every requested config. A parse/semantic error on any of
|
||
// them is fatal — we want the user to see it rather than silently
|
||
// probing a reduced set of VIPs because one file was broken.
|
||
configs := make([]*config.Config, 0, len(cfgPaths))
|
||
for _, p := range cfgPaths {
|
||
cfg, res := config.Check(p)
|
||
if !res.OK() {
|
||
if res.ParseError != "" {
|
||
return fmt.Errorf("config parse %s: %s", p, res.ParseError)
|
||
}
|
||
return fmt.Errorf("config semantic %s: %s", p, res.SemanticError)
|
||
}
|
||
configs = append(configs, cfg)
|
||
}
|
||
|
||
var filterRe *regexp.Regexp
|
||
if *filter != "" {
|
||
var err error
|
||
filterRe, err = regexp.Compile(*filter)
|
||
if err != nil {
|
||
return fmt.Errorf("invalid --filter regex: %w", err)
|
||
}
|
||
}
|
||
|
||
opts := probeOpts{
|
||
Interval: *interval,
|
||
Timeout: *timeout,
|
||
Host: *host,
|
||
Path: *path,
|
||
Header: *header,
|
||
Insecure: *insecure,
|
||
KeepAlive: *keepalive,
|
||
}
|
||
|
||
vips := buildVIPsUnion(configs, cfgPaths, filterRe, opts)
|
||
if len(vips) == 0 {
|
||
return fmt.Errorf("no matching frontends in %s", strings.Join(cfgPaths, ", "))
|
||
}
|
||
|
||
m := Model{
|
||
host: fqdn(),
|
||
vips: vips,
|
||
opts: opts,
|
||
startAt: time.Now(),
|
||
showDNS: true,
|
||
}
|
||
|
||
ctx, cancel := context.WithCancel(context.Background())
|
||
defer cancel()
|
||
|
||
prog := tea.NewProgram(m, tea.WithAltScreen())
|
||
|
||
// Spawn one probe goroutine per VIP. Each sends probeResultMsg
|
||
// into the tea.Program via prog.Send, which is thread-safe.
|
||
// Alongside the prober we kick off a one-shot reverse-DNS
|
||
// lookup so the 'd' toggle has a hostname to display; the
|
||
// lookup is best-effort and simply drops on timeout or NXDOMAIN.
|
||
for _, v := range vips {
|
||
go runProbeLoop(ctx, v.info, opts, prog.Send)
|
||
go runDNSLookup(ctx, v.info, prog.Send)
|
||
}
|
||
|
||
_, err := prog.Run()
|
||
cancel()
|
||
// Give the workers a beat to observe ctx and exit. This isn't
|
||
// strictly required — the process is exiting anyway — but a clean
|
||
// shutdown avoids the "unexpected EOF writing to a closed
|
||
// transport" spam that HTTP clients sometimes emit on ctrl-C.
|
||
time.Sleep(50 * time.Millisecond)
|
||
return err
|
||
}
|
||
|
||
// buildVIPsUnion flattens frontends from multiple configs into a
|
||
// single deduplicated probe list, keyed by the (scheme, address,
|
||
// port) tuple. The typical use case is a pair of maglevds fronting
|
||
// the same two VIPs (vip0 / vip1, IPv4 + IPv6, × port 80 + 443 = 8
|
||
// probers) — the operator passes both yaml files and maglevt unions
|
||
// them so the probe grid doesn't grow duplicates from mirrored
|
||
// configs. The symbolic frontend name from yaml is intentionally
|
||
// dropped: when two files use the same name for different tuples
|
||
// (common in cross-deployment comparisons) the name would be
|
||
// ambiguous, and the tuple is the only stable identity. Only the
|
||
// --filter regex still uses the name, as a pre-dedup match.
|
||
//
|
||
// Dedup key uses net.IP.String() which canonicalises IPv6 zero-
|
||
// compression, so 2001:db8::1 and 2001:db8:0:0:0:0:0:1 collapse
|
||
// onto one entry. Iteration order across files is stable for
|
||
// deterministic TUI layout: within a file, frontends are visited
|
||
// in name-sorted order; across files, the first occurrence of each
|
||
// tuple wins and fixes its slot in the output.
|
||
// fqdn returns the system's fully-qualified hostname: gethostname(2)
|
||
// via os.Hostname() for the short name, then a CNAME lookup to reach
|
||
// the canonical form the resolver would hand back — the same two-step
|
||
// dance `hostname -f` performs. Falls back to the short name when the
|
||
// resolver has nothing to add, so the TUI header always renders.
|
||
func fqdn() string {
|
||
h, err := os.Hostname()
|
||
if err != nil {
|
||
return "unknown"
|
||
}
|
||
if cname, err := net.LookupCNAME(h); err == nil {
|
||
if s := strings.TrimSuffix(cname, "."); s != "" {
|
||
return s
|
||
}
|
||
}
|
||
return h
|
||
}
|
||
|
||
func buildVIPsUnion(cfgs []*config.Config, cfgPaths []string, filterRe *regexp.Regexp, opts probeOpts) []*vipState {
|
||
_ = cfgPaths // reserved for future diagnostics (e.g. which file this tuple came from)
|
||
type key struct {
|
||
ip string
|
||
scheme string
|
||
port uint16
|
||
}
|
||
seen := map[key]*vipState{}
|
||
var order []key
|
||
|
||
for _, cfg := range cfgs {
|
||
names := make([]string, 0, len(cfg.Frontends))
|
||
for name := range cfg.Frontends {
|
||
names = append(names, name)
|
||
}
|
||
sortStringsInPlace(names)
|
||
|
||
for _, name := range names {
|
||
fe := cfg.Frontends[name]
|
||
if filterRe != nil && !filterRe.MatchString(name) {
|
||
continue
|
||
}
|
||
if strings.ToLower(fe.Protocol) != "tcp" || fe.Port == 0 {
|
||
continue
|
||
}
|
||
scheme := schemeForPort(fe.Port)
|
||
k := key{ip: fe.Address.String(), scheme: scheme, port: fe.Port}
|
||
if _, ok := seen[k]; ok {
|
||
continue // already claimed by an earlier file
|
||
}
|
||
info := &vipInfo{
|
||
idx: len(order),
|
||
scheme: scheme,
|
||
ip: fe.Address,
|
||
port: fe.Port,
|
||
}
|
||
if scheme == "http" || scheme == "https" {
|
||
info.url = buildURL(scheme, fe.Address, fe.Port, opts.Path)
|
||
info.client = newHTTPClient(opts)
|
||
}
|
||
v := &vipState{
|
||
info: info,
|
||
rolling: newRolling(),
|
||
tally: map[string]int{},
|
||
tallyOld: map[string]int{},
|
||
tallyNew: map[string]int{},
|
||
}
|
||
seen[k] = v
|
||
order = append(order, k)
|
||
}
|
||
}
|
||
|
||
out := make([]*vipState, len(order))
|
||
for i, k := range order {
|
||
out[i] = seen[k]
|
||
}
|
||
// Display order: IPv6 before IPv4, higher ports before lower
|
||
// within each address family, then address string as a final
|
||
// tiebreaker for determinism across runs. HTTPS :443 sitting
|
||
// above HTTP :80 matches the "secure first" reading order most
|
||
// operators expect, and clustering all the IPv6 rows at the top
|
||
// keeps a mixed-family deployment visually coherent as the
|
||
// operator scans down the table.
|
||
sort.SliceStable(out, func(i, j int) bool {
|
||
vi, vj := out[i].info, out[j].info
|
||
iIs6 := vi.ip.To4() == nil
|
||
jIs6 := vj.ip.To4() == nil
|
||
if iIs6 != jIs6 {
|
||
return iIs6
|
||
}
|
||
if vi.port != vj.port {
|
||
return vi.port > vj.port
|
||
}
|
||
return vi.ip.String() < vj.ip.String()
|
||
})
|
||
// Re-index after the sort so info.idx matches the slot each VIP
|
||
// now occupies in out — probeResultMsg.VIPIdx is looked up via
|
||
// this index in Model.Update, so they must agree.
|
||
for i, v := range out {
|
||
v.info.idx = i
|
||
}
|
||
return out
|
||
}
|
||
|
||
// multiFlag is a flag.Value implementation that accumulates repeated
|
||
// --config occurrences into a slice, and also accepts comma-separated
|
||
// values on a single flag instance so `--config a.yaml,b.yaml` and
|
||
// `--config a.yaml --config b.yaml` produce the same result.
|
||
type multiFlag []string
|
||
|
||
func (m *multiFlag) String() string {
|
||
return strings.Join(*m, ",")
|
||
}
|
||
|
||
func (m *multiFlag) Set(v string) error {
|
||
for _, p := range strings.Split(v, ",") {
|
||
p = strings.TrimSpace(p)
|
||
if p != "" {
|
||
*m = append(*m, p)
|
||
}
|
||
}
|
||
return nil
|
||
}
|
||
|
||
// schemeForPort infers HTTP vs HTTPS from the VIP's TCP port, falling
|
||
// back to "tcp" (connect-only probe) for anything that isn't
|
||
// unambiguously web. Intentionally narrow — we'd rather under-classify
|
||
// than send HEAD / at an IMAPS VIP and spew protocol errors into the
|
||
// logs. Adding more here is fine later (e.g. 8080/8443) but defaults
|
||
// should stay conservative.
|
||
func schemeForPort(port uint16) string {
|
||
switch port {
|
||
case 80:
|
||
return "http"
|
||
case 443:
|
||
return "https"
|
||
}
|
||
return "tcp"
|
||
}
|
||
|
||
// buildURL constructs the probe URL for an HTTP/HTTPS VIP. IPv6
|
||
// literals are bracketed per RFC 3986 §3.2.2 so the colon in the
|
||
// address isn't confused with the port separator.
|
||
func buildURL(scheme string, ip net.IP, port uint16, path string) string {
|
||
host := ip.String()
|
||
if ip.To4() == nil {
|
||
host = "[" + host + "]"
|
||
}
|
||
if path == "" {
|
||
path = "/"
|
||
}
|
||
return fmt.Sprintf("%s://%s:%d%s", scheme, host, port, path)
|
||
}
|
||
|
||
// sortStringsInPlace is a tiny shim so we don't import "sort" just
|
||
// for a single call from buildVIPs. The sorted-names slice is at
|
||
// most a few dozen elements so an insertion sort is fine and avoids
|
||
// the import churn.
|
||
func sortStringsInPlace(s []string) {
|
||
for i := 1; i < len(s); i++ {
|
||
for j := i; j > 0 && s[j-1] > s[j]; j-- {
|
||
s[j-1], s[j] = s[j], s[j-1]
|
||
}
|
||
}
|
||
}
|