Files
vpp-maglev/internal/prober/http.go
Pim van Pelt d612086a5f Pools, CLI, versioning, Debian packaging, HTTPS fix
- Replaced flat `backends: [...]` list on frontends with an ordered `pools:`
  list; each pool has a name and a map of backends with per-pool weights (0–100,
  default 100). Pools express priority: first pool with a healthy backend wins.
- Removed global backend weight (was on the backend, now lives in the pool).
- Config validation enforces non-empty pools, non-empty pool names, weight
  range, and consistent address families across all pools of a frontend.

- Added `PoolBackendInfo { name, weight }` and changed `PoolInfo.backends` from
  `repeated string` to `repeated PoolBackendInfo` so weights are visible over
  the API.

- Full interactive shell with readline, tab completion, and `?` inline help.
- Command tree parser (Walk) handles fixed keywords and dynamic slot nodes;
  prefix matching with exact-match priority.
- Commands: `show version/frontends/frontend/backends/backend/healthchecks/
  healthcheck`, `set backend <name> pause|resume`, `quit`/`exit`.
- `show frontend` output is hierarchical (pools → backends) with per-backend
  weights and `[disabled]` notation; pool section uses fixed-width formatting
  so ANSI color codes don't corrupt tabwriter alignment.
- `-color` flag (default true) wraps static field labels in dark-blue ANSI;
  works correctly with tabwriter because all labels carry identical-length
  escape sequences.

- `cmd/version.go` package holds `version`, `commit`, `date` vars set at build
  time via `-ldflags -X`.
- `make build` / `make build-amd64` / `make build-arm64` all inject
  `VERSION=0.1.1`, `COMMIT_HASH` (from `git rev-parse --short HEAD`), and
  `DATE` (UTC ISO-8601).
- `maglevc` prints version on interactive startup and exposes `show version`.
- `maglevd` logs version/commit/date at startup; `-version` flag prints and exits.

- `doHTTPProbe` was building a `https://` target URL even though TLS was already
  applied to the connection inside `inNetns`. `http.Transport` then wrapped the
  connection in a second TLS layer, producing "http: server gave HTTP response
  to HTTPS client". Fixed by always using `http://` in the target URL.
- Added `TestHTTPSProbe` using `httptest.NewTLSServer` to cover the full path.

- New `docs/user-guide.md`: maglevd flags/signals, maglevc commands, shell
  completion, and command-tree parser walkthrough.
- New `docs/healthchecks.md`: state machine, rise/fall model, probe intervals,
  all transition events with log examples.
- Updated `docs/config-guide.md`: pools design, removed global weight from
  backends, updated all examples.
- Updated `README.md`: packaging table, build paths, corrected binary locations
  (`/usr/sbin/maglevd`), config filename (`.yaml`).

- `debian/` directory contains `control.in`, `maglevd.service`, `default.maglev`,
  `maglev.yaml` (example config), `conffiles`, `postinst`, `prerm`.
- `debian/build-deb.sh` stages a package tree and calls `dpkg-deb`; emits
  `build/vpp-maglev_<version>~<commit>_<arch>.deb`.
- Cross-compiles for amd64 and arm64 in one `make pkg-deb` invocation.
- `maglevd` installed to `/usr/sbin/`, `maglevc` to `/usr/bin/`.
- Service reads `MAGLEV_CONFIG` from `/etc/default/maglev`
  (default: `/etc/maglev/maglev.yaml`).
- Man pages `maglevd(8)` and `maglevc(1)` live in `docs/` and are gzip'd into
  the package.
- All build output goes to `build/<arch>/`; `build/` is gitignored.
2026-04-11 12:18:17 +02:00

159 lines
4.7 KiB
Go

// Copyright (c) 2026, Pim van Pelt <pim@ipng.ch>
package prober
import (
"context"
"crypto/tls"
"fmt"
"io"
"net"
"net/http"
"strconv"
"strings"
"git.ipng.ch/ipng/vpp-maglev/internal/health"
)
// HTTPProbe sends a plain HTTP GET to cfg.Target inside the healthcheck netns.
func HTTPProbe(ctx context.Context, cfg ProbeConfig) health.ProbeResult {
return doHTTPProbe(ctx, cfg, false)
}
// HTTPSProbe sends an HTTP GET over TLS to cfg.Target inside the healthcheck netns.
func HTTPSProbe(ctx context.Context, cfg ProbeConfig) health.ProbeResult {
return doHTTPProbe(ctx, cfg, true)
}
func doHTTPProbe(ctx context.Context, cfg ProbeConfig, useTLS bool) health.ProbeResult {
if cfg.HTTP == nil {
return health.ProbeResult{OK: false, Layer: health.LayerUnknown, Code: "UNKNOWN", Detail: "missing HTTP params"}
}
p := cfg.HTTP
port := cfg.Port
if port == 0 {
if useTLS {
port = 443
} else {
port = 80
}
}
// Always use "http" scheme: TLS (if any) is already applied to conn during
// the netns dial phase. Using "https" here would cause http.Transport to
// wrap conn in a second TLS layer, producing "http: server gave HTTP
// response to HTTPS client".
target := fmt.Sprintf("http://%s%s", net.JoinHostPort(cfg.Target.String(), strconv.Itoa(int(port))), p.Path)
hostHeader := p.Host
if hostHeader == "" {
hostHeader = cfg.Target.String()
}
// Dial (and optionally handshake) inside the healthcheck netns.
// The socket retains its netns after creation, so HTTP can be done outside.
var conn net.Conn
dialErr := inNetns(cfg.HealthCheckNetns, func() error {
dialer := &net.Dialer{Timeout: cfg.Timeout}
if cfg.ProbeSrc != nil {
dialer.LocalAddr = &net.TCPAddr{IP: cfg.ProbeSrc}
}
c, err := dialer.DialContext(ctx, "tcp", net.JoinHostPort(cfg.Target.String(), strconv.Itoa(int(port))))
if err != nil {
return err
}
if useTLS {
tlsConn := tls.Client(c, tlsConfig(p.ServerName, p.InsecureSkipVerify))
if err := tlsConn.HandshakeContext(ctx); err != nil {
c.Close()
return err
}
conn = tlsConn
} else {
conn = c
}
return nil
})
if dialErr != nil {
if isTimeout(dialErr) {
return health.ProbeResult{OK: false, Layer: health.LayerL4, Code: "L4TOUT", Detail: dialErr.Error()}
}
// Distinguish TLS handshake failures (L6) from TCP connect failures (L4).
// conn is non-nil only when TCP succeeded but TLS handshake failed.
if useTLS && conn == nil && isTLSError(dialErr) {
if isTimeout(dialErr) {
return health.ProbeResult{OK: false, Layer: health.LayerL6, Code: "L6TOUT", Detail: dialErr.Error()}
}
return health.ProbeResult{OK: false, Layer: health.LayerL6, Code: "L6RSP", Detail: dialErr.Error()}
}
return health.ProbeResult{OK: false, Layer: health.LayerL4, Code: "L4CON", Detail: dialErr.Error()}
}
defer conn.Close()
transport := &http.Transport{
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
return conn, nil
},
DisableKeepAlives: true,
}
client := &http.Client{
Transport: transport,
Timeout: cfg.Timeout,
CheckRedirect: func(_ *http.Request, _ []*http.Request) error {
return http.ErrUseLastResponse // never follow redirects
},
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, target, nil)
if err != nil {
return health.ProbeResult{OK: false, Layer: health.LayerL7, Code: "L7RSP", Detail: err.Error()}
}
req.Host = hostHeader
req.Header.Set("User-Agent", "maglev-healthchecker/1.0")
resp, err := client.Do(req)
if err != nil {
if isTimeout(err) {
return health.ProbeResult{OK: false, Layer: health.LayerL7, Code: "L7TOUT", Detail: err.Error()}
}
return health.ProbeResult{OK: false, Layer: health.LayerL7, Code: "L7RSP", Detail: err.Error()}
}
defer resp.Body.Close()
if resp.StatusCode < p.ResponseCodeMin || resp.StatusCode > p.ResponseCodeMax {
return health.ProbeResult{
OK: false,
Layer: health.LayerL7,
Code: "L7STS",
Detail: fmt.Sprintf("HTTP %d (want %d-%d)", resp.StatusCode, p.ResponseCodeMin, p.ResponseCodeMax),
}
}
if p.ResponseRegexp != nil {
body, err := io.ReadAll(resp.Body)
if err != nil {
return health.ProbeResult{OK: false, Layer: health.LayerL7, Code: "L7TOUT", Detail: err.Error()}
}
if !p.ResponseRegexp.Match(body) {
return health.ProbeResult{
OK: false,
Layer: health.LayerL7,
Code: "L7RSP",
Detail: fmt.Sprintf("body did not match regexp %q", p.ResponseRegexp),
}
}
}
return health.ProbeResult{OK: true, Layer: health.LayerL7, Code: "L7OK"}
}
// isTLSError returns true if err originated from the TLS layer.
func isTLSError(err error) bool {
if err == nil {
return false
}
_, ok := err.(tls.AlertError)
return ok || strings.Contains(err.Error(), "tls:")
}