SPA (cmd/frontend/web): - New "lb buckets" column backed by a 1s-debounced GetVPPLBState fetch loop with leading+trailing edge coalesce. - Per-frontend health icon (✅/⚠️/❗/‼️/❓) in the Zippy header, gated by a settling flag that suppresses ‼️ until the next lb-state reconciliation after a backend transition or weight change. - In-place leaf merge on lb-state so stable bucket values (e.g. "0") don't retrigger the Flash animation on every refresh. - Zippy cards remember open state in a cookie, default closed on fresh load; fixed-width frontend-title-name + reserved icon slot so headers line up across all cards. - Clock-drift watchdog in sse.ts that forces a fresh EventSource on laptop-wake so the broker emits a resync instead of hanging on a dead half-open socket. Frontend service (cmd/frontend): - maglevClient.lbStateLoop, trigger on backend transitions + vpp-connect, best-effort fetch on refreshAll. - Admin handlers explicitly wake the lb-state loop after lifecycle ops and set-weight (the latter emits no transition event on the maglevd side, so the WatchEvents path wouldn't have caught it). - /favicon.ico served from embedded web/public IPng logo. VPP integration: - internal/vpp/lbstate.go: dumpASesForVIP drops Pfx from the dump request (setting it silently wipes IPv4 replies in the LB plugin) and filters results by prefix on the response side instead, which also demuxes multi-VIP-on-same-port cases correctly. maglevc: - Walk now returns the unconsumed token tail; dispatch and the question listener reject unknown commands with a targeted error instead of dumping the full command tree prefixed with garbage. - On '?', echo the current line (including the '?') before the help list so the output reads like birdc. Checker / prober: - internal/checker: ±10% jitter on NextInterval so probes across restart don't all fire on the same tick. - internal/prober: HTTP User-Agent now carries the build version and project URL. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
162 lines
4.8 KiB
Go
162 lines
4.8 KiB
Go
// Copyright (c) 2026, Pim van Pelt <pim@ipng.ch>
|
|
|
|
package prober
|
|
|
|
import (
|
|
"context"
|
|
"crypto/tls"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"strconv"
|
|
"strings"
|
|
|
|
buildinfo "git.ipng.ch/ipng/vpp-maglev/cmd"
|
|
"git.ipng.ch/ipng/vpp-maglev/internal/health"
|
|
)
|
|
|
|
var userAgent = "maglev-healthchecker/" + buildinfo.Version() + " (+https://git.ipng.ch/ipng/vpp-maglev)"
|
|
|
|
// HTTPProbe sends a plain HTTP GET to cfg.Target inside the healthcheck netns.
|
|
func HTTPProbe(ctx context.Context, cfg ProbeConfig) health.ProbeResult {
|
|
return doHTTPProbe(ctx, cfg, false)
|
|
}
|
|
|
|
// HTTPSProbe sends an HTTP GET over TLS to cfg.Target inside the healthcheck netns.
|
|
func HTTPSProbe(ctx context.Context, cfg ProbeConfig) health.ProbeResult {
|
|
return doHTTPProbe(ctx, cfg, true)
|
|
}
|
|
|
|
func doHTTPProbe(ctx context.Context, cfg ProbeConfig, useTLS bool) health.ProbeResult {
|
|
if cfg.HTTP == nil {
|
|
return health.ProbeResult{OK: false, Layer: health.LayerUnknown, Code: "UNKNOWN", Detail: "missing HTTP params"}
|
|
}
|
|
p := cfg.HTTP
|
|
|
|
port := cfg.Port
|
|
if port == 0 {
|
|
if useTLS {
|
|
port = 443
|
|
} else {
|
|
port = 80
|
|
}
|
|
}
|
|
|
|
// Always use "http" scheme: TLS (if any) is already applied to conn during
|
|
// the netns dial phase. Using "https" here would cause http.Transport to
|
|
// wrap conn in a second TLS layer, producing "http: server gave HTTP
|
|
// response to HTTPS client".
|
|
target := fmt.Sprintf("http://%s%s", net.JoinHostPort(cfg.Target.String(), strconv.Itoa(int(port))), p.Path)
|
|
|
|
hostHeader := p.Host
|
|
if hostHeader == "" {
|
|
hostHeader = cfg.Target.String()
|
|
}
|
|
|
|
// Dial (and optionally handshake) inside the healthcheck netns.
|
|
// The socket retains its netns after creation, so HTTP can be done outside.
|
|
var conn net.Conn
|
|
dialErr := inNetns(cfg.HealthCheckNetns, func() error {
|
|
dialer := &net.Dialer{Timeout: cfg.Timeout}
|
|
if cfg.ProbeSrc != nil {
|
|
dialer.LocalAddr = &net.TCPAddr{IP: cfg.ProbeSrc}
|
|
}
|
|
c, err := dialer.DialContext(ctx, "tcp", net.JoinHostPort(cfg.Target.String(), strconv.Itoa(int(port))))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if useTLS {
|
|
tlsConn := tls.Client(c, tlsConfig(p.ServerName, p.InsecureSkipVerify))
|
|
if err := tlsConn.HandshakeContext(ctx); err != nil {
|
|
c.Close()
|
|
return err
|
|
}
|
|
conn = tlsConn
|
|
} else {
|
|
conn = c
|
|
}
|
|
return nil
|
|
})
|
|
if dialErr != nil {
|
|
if isTimeout(dialErr) {
|
|
return health.ProbeResult{OK: false, Layer: health.LayerL4, Code: "L4TOUT", Detail: dialErr.Error()}
|
|
}
|
|
// Distinguish TLS handshake failures (L6) from TCP connect failures (L4).
|
|
// conn is non-nil only when TCP succeeded but TLS handshake failed.
|
|
if useTLS && conn == nil && isTLSError(dialErr) {
|
|
if isTimeout(dialErr) {
|
|
return health.ProbeResult{OK: false, Layer: health.LayerL6, Code: "L6TOUT", Detail: dialErr.Error()}
|
|
}
|
|
return health.ProbeResult{OK: false, Layer: health.LayerL6, Code: "L6RSP", Detail: dialErr.Error()}
|
|
}
|
|
return health.ProbeResult{OK: false, Layer: health.LayerL4, Code: "L4CON", Detail: dialErr.Error()}
|
|
}
|
|
defer conn.Close()
|
|
|
|
transport := &http.Transport{
|
|
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
|
|
return conn, nil
|
|
},
|
|
DisableKeepAlives: true,
|
|
}
|
|
client := &http.Client{
|
|
Transport: transport,
|
|
Timeout: cfg.Timeout,
|
|
CheckRedirect: func(_ *http.Request, _ []*http.Request) error {
|
|
return http.ErrUseLastResponse // never follow redirects
|
|
},
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, target, nil)
|
|
if err != nil {
|
|
return health.ProbeResult{OK: false, Layer: health.LayerL7, Code: "L7RSP", Detail: err.Error()}
|
|
}
|
|
req.Host = hostHeader
|
|
req.Header.Set("User-Agent", userAgent)
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
if isTimeout(err) {
|
|
return health.ProbeResult{OK: false, Layer: health.LayerL7, Code: "L7TOUT", Detail: err.Error()}
|
|
}
|
|
return health.ProbeResult{OK: false, Layer: health.LayerL7, Code: "L7RSP", Detail: err.Error()}
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode < p.ResponseCodeMin || resp.StatusCode > p.ResponseCodeMax {
|
|
return health.ProbeResult{
|
|
OK: false,
|
|
Layer: health.LayerL7,
|
|
Code: "L7STS",
|
|
Detail: fmt.Sprintf("HTTP %d (want %d-%d)", resp.StatusCode, p.ResponseCodeMin, p.ResponseCodeMax),
|
|
}
|
|
}
|
|
|
|
if p.ResponseRegexp != nil {
|
|
body, err := io.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return health.ProbeResult{OK: false, Layer: health.LayerL7, Code: "L7TOUT", Detail: err.Error()}
|
|
}
|
|
if !p.ResponseRegexp.Match(body) {
|
|
return health.ProbeResult{
|
|
OK: false,
|
|
Layer: health.LayerL7,
|
|
Code: "L7RSP",
|
|
Detail: fmt.Sprintf("body did not match regexp %q", p.ResponseRegexp),
|
|
}
|
|
}
|
|
}
|
|
|
|
return health.ProbeResult{OK: true, Layer: health.LayerL7, Code: "L7OK"}
|
|
}
|
|
|
|
// isTLSError returns true if err originated from the TLS layer.
|
|
func isTLSError(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
_, ok := err.(tls.AlertError)
|
|
return ok || strings.Contains(err.Error(), "tls:")
|
|
}
|