Initial revisin of healthchecker, inspired by HAProxy

This commit is contained in:
2026-04-10 17:30:44 +02:00
commit b84b3274b1
24 changed files with 4400 additions and 0 deletions

368
internal/config/config.go Normal file
View File

@@ -0,0 +1,368 @@
package config
import (
"fmt"
"net"
"os"
"regexp"
"strconv"
"strings"
"time"
"gopkg.in/yaml.v3"
)
// Frontend is the parsed and validated representation of frontend.yaml.
type Frontend struct {
ProbeIPv4Src net.IP
ProbeIPv6Src net.IP
HealthCheckNetns string
HealthChecker HealthCheckerConfig
VIPs map[string]VIP
}
// HealthCheckerConfig holds global health checker settings.
type HealthCheckerConfig struct {
TransitionHistory int
}
// VIP is a single virtual IP entry.
type VIP struct {
Description string
Address net.IP
Protocol string // "tcp", "udp", or "" (all traffic)
Port uint16 // 0 means omitted (all ports)
Backends []net.IP
HealthCheck HealthCheck
}
// HealthCheck describes how to probe backends for a VIP.
type HealthCheck struct {
Type string
HTTP *HTTPParams // non-nil for type http and https
TCP *TCPParams // non-nil for type tcp
Interval time.Duration
FastInterval time.Duration // optional; used while health counter is degraded
DownInterval time.Duration // optional; used while fully down
Timeout time.Duration
Rise int // default 2
Fall int // default 3
}
// HTTPParams holds validated parameters for http/https health checks.
type HTTPParams struct {
Path string
Host string // Host header; defaults to backend IP if empty
ResponseCodeMin int // inclusive lower bound; default 200
ResponseCodeMax int // inclusive upper bound; default 200
ResponseRegexp *regexp.Regexp // nil if not configured
ServerName string // TLS SNI; falls back to Host if empty (https only)
InsecureSkipVerify bool // skip TLS certificate verification (https only)
}
// TCPParams holds validated parameters for tcp health checks.
type TCPParams struct {
SSL bool
ServerName string
InsecureSkipVerify bool
BannerRegexp *regexp.Regexp // nil if not configured; matched against the first line sent by the server
}
// ---- raw YAML types --------------------------------------------------------
type rawConfig struct {
Maglev struct {
Frontend rawFrontend `yaml:"frontend"`
} `yaml:"maglev"`
}
type rawFrontend struct {
ProbeIPv4Src string `yaml:"probe-ipv4-src"`
ProbeIPv6Src string `yaml:"probe-ipv6-src"`
HealthCheckNetns string `yaml:"healthcheck-netns"`
HealthChecker rawHealthCheckerCfg `yaml:"healthchecker"`
VIPs map[string]rawVIP `yaml:"vips"`
}
type rawHealthCheckerCfg struct {
TransitionHistory int `yaml:"transition-history"`
}
type rawVIP struct {
Description string `yaml:"description"`
Address string `yaml:"address"`
Protocol string `yaml:"protocol"`
Port uint16 `yaml:"port"`
Backends []string `yaml:"backends"`
HealthCheck rawHealthCheck `yaml:"healthcheck"`
}
type rawHealthCheck struct {
Type string `yaml:"type"`
Params rawParams `yaml:"params"`
Interval string `yaml:"interval"`
FastInterval string `yaml:"fast-interval"`
DownInterval string `yaml:"down-interval"`
Timeout string `yaml:"timeout"`
Rise int `yaml:"rise"`
Fall int `yaml:"fall"`
}
type rawParams struct {
// HTTP / HTTPS
Path string `yaml:"path"`
Host string `yaml:"host"`
ResponseCode string `yaml:"response-code"`
ResponseRegexp string `yaml:"response-regexp"`
ServerName string `yaml:"server-name"`
InsecureSkipVerify bool `yaml:"insecure-skip-verify"`
// TCP
SSL bool `yaml:"ssl"`
}
// ---- Load ------------------------------------------------------------------
// Load reads and validates the config file at path.
func Load(path string) (*Frontend, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("read config %q: %w", path, err)
}
return parse(data)
}
func parse(data []byte) (*Frontend, error) {
var raw rawConfig
if err := yaml.Unmarshal(data, &raw); err != nil {
return nil, fmt.Errorf("parse yaml: %w", err)
}
return convert(&raw.Maglev.Frontend)
}
func convert(r *rawFrontend) (*Frontend, error) {
f := &Frontend{}
var err error
if f.ProbeIPv4Src, err = parseOptionalIPFamily(r.ProbeIPv4Src, 4, "probe-ipv4-src"); err != nil {
return nil, err
}
if f.ProbeIPv6Src, err = parseOptionalIPFamily(r.ProbeIPv6Src, 6, "probe-ipv6-src"); err != nil {
return nil, err
}
f.HealthCheckNetns = r.HealthCheckNetns
f.HealthChecker.TransitionHistory = r.HealthChecker.TransitionHistory
if f.HealthChecker.TransitionHistory == 0 {
f.HealthChecker.TransitionHistory = 5
}
if f.HealthChecker.TransitionHistory < 1 {
return nil, fmt.Errorf("healthchecker.transition-history must be >= 1")
}
f.VIPs = make(map[string]VIP, len(r.VIPs))
for name, rv := range r.VIPs {
vip, err := convertVIP(name, &rv)
if err != nil {
return nil, fmt.Errorf("vip %q: %w", name, err)
}
f.VIPs[name] = vip
}
return f, nil
}
func convertVIP(name string, r *rawVIP) (VIP, error) {
v := VIP{
Description: r.Description,
Protocol: r.Protocol,
Port: r.Port,
}
ip := net.ParseIP(r.Address)
if ip == nil {
return VIP{}, fmt.Errorf("invalid address %q", r.Address)
}
v.Address = ip
switch r.Protocol {
case "", "tcp", "udp":
default:
return VIP{}, fmt.Errorf("protocol must be \"tcp\", \"udp\", or omitted, got %q", r.Protocol)
}
if r.Port != 0 && r.Protocol == "" {
return VIP{}, fmt.Errorf("port requires protocol to be set")
}
if r.Protocol != "" && r.Port == 0 {
return VIP{}, fmt.Errorf("protocol %q requires port to be set (1-65535)", r.Protocol)
}
if len(r.Backends) == 0 {
return VIP{}, fmt.Errorf("backends must not be empty")
}
var firstFamily int
for i, bs := range r.Backends {
ip := net.ParseIP(bs)
if ip == nil {
return VIP{}, fmt.Errorf("backend[%d] %q is not a valid IP", i, bs)
}
fam := ipFamily(ip)
if i == 0 {
firstFamily = fam
} else if fam != firstFamily {
return VIP{}, fmt.Errorf("backend[%d] %q has different address family than backend[0]", i, bs)
}
v.Backends = append(v.Backends, ip)
}
hc, err := convertHealthCheck(&r.HealthCheck)
if err != nil {
return VIP{}, fmt.Errorf("healthcheck: %w", err)
}
v.HealthCheck = hc
return v, nil
}
func convertHealthCheck(r *rawHealthCheck) (HealthCheck, error) {
h := HealthCheck{
Type: r.Type,
}
switch r.Type {
case "icmp":
// no params
case "tcp":
h.TCP = &TCPParams{
SSL: r.Params.SSL,
ServerName: r.Params.ServerName,
InsecureSkipVerify: r.Params.InsecureSkipVerify,
}
case "http", "https":
if r.Params.Path == "" {
return HealthCheck{}, fmt.Errorf("type http requires params.path")
}
min, max, err := parseCodeRange(r.Params.ResponseCode, 200)
if err != nil {
return HealthCheck{}, err
}
hp := &HTTPParams{
Path: r.Params.Path,
Host: r.Params.Host,
ResponseCodeMin: min,
ResponseCodeMax: max,
InsecureSkipVerify: r.Params.InsecureSkipVerify,
}
// TLS SNI: server-name takes precedence, falls back to host.
hp.ServerName = r.Params.ServerName
if hp.ServerName == "" {
hp.ServerName = r.Params.Host
}
if r.Params.ResponseRegexp != "" {
re, err := regexp.Compile(r.Params.ResponseRegexp)
if err != nil {
return HealthCheck{}, fmt.Errorf("invalid response-regexp %q: %w", r.Params.ResponseRegexp, err)
}
hp.ResponseRegexp = re
}
h.HTTP = hp
default:
return HealthCheck{}, fmt.Errorf("type must be \"icmp\", \"tcp\", \"http\", or \"https\", got %q", r.Type)
}
var err error
if r.Interval == "" {
return HealthCheck{}, fmt.Errorf("interval is required")
}
if h.Interval, err = time.ParseDuration(r.Interval); err != nil || h.Interval <= 0 {
return HealthCheck{}, fmt.Errorf("interval %q must be a positive duration", r.Interval)
}
if r.FastInterval != "" {
if h.FastInterval, err = time.ParseDuration(r.FastInterval); err != nil || h.FastInterval <= 0 {
return HealthCheck{}, fmt.Errorf("fast-interval %q must be a positive duration", r.FastInterval)
}
}
if r.DownInterval != "" {
if h.DownInterval, err = time.ParseDuration(r.DownInterval); err != nil || h.DownInterval <= 0 {
return HealthCheck{}, fmt.Errorf("down-interval %q must be a positive duration", r.DownInterval)
}
}
if r.Timeout == "" {
return HealthCheck{}, fmt.Errorf("timeout is required")
}
if h.Timeout, err = time.ParseDuration(r.Timeout); err != nil || h.Timeout <= 0 {
return HealthCheck{}, fmt.Errorf("timeout %q must be a positive duration", r.Timeout)
}
h.Fall = r.Fall
if h.Fall == 0 {
h.Fall = 3
}
if h.Fall < 1 {
return HealthCheck{}, fmt.Errorf("fall must be >= 1")
}
h.Rise = r.Rise
if h.Rise == 0 {
h.Rise = 2
}
if h.Rise < 1 {
return HealthCheck{}, fmt.Errorf("rise must be >= 1")
}
return h, nil
}
// ---- helpers ---------------------------------------------------------------
// parseOptionalIPFamily parses s as an IP of the given family.
// Returns nil (no error) if s is empty.
func parseOptionalIPFamily(s string, family int, field string) (net.IP, error) {
if s == "" {
return nil, nil
}
ip := net.ParseIP(s)
if ip == nil {
return nil, fmt.Errorf("%s %q is not a valid IP address", field, s)
}
if ipFamily(ip) != family {
return nil, fmt.Errorf("%s %q must be an IPv%d address", field, s, family)
}
return ip, nil
}
// ipFamily returns 4 for IPv4, 6 for IPv6.
func ipFamily(ip net.IP) int {
if ip.To4() != nil {
return 4
}
return 6
}
// parseCodeRange parses a response-code value which may be a single integer
// ("200") or an inclusive range ("200-299"). Returns (min, max, err).
func parseCodeRange(s string, defaultCode int) (min, max int, err error) {
if s == "" {
return defaultCode, defaultCode, nil
}
if idx := strings.IndexByte(s, '-'); idx > 0 {
min, err = strconv.Atoi(s[:idx])
if err != nil {
return 0, 0, fmt.Errorf("invalid response-code range %q", s)
}
max, err = strconv.Atoi(s[idx+1:])
if err != nil {
return 0, 0, fmt.Errorf("invalid response-code range %q", s)
}
return min, max, nil
}
min, err = strconv.Atoi(s)
if err != nil {
return 0, 0, fmt.Errorf("invalid response-code %q", s)
}
return min, min, nil
}

View File

@@ -0,0 +1,408 @@
package config
import (
"testing"
"time"
)
const validConfig = `
maglev:
frontend:
probe-ipv4-src: 10.0.0.1
probe-ipv6-src: 2001:db8:1::1
healthcheck-netns: dataplane
healthchecker:
transition-history: 5
vips:
web4:
description: "IPv4 VIP"
address: 192.0.2.1
protocol: tcp
port: 80
backends: [2001:db8:2::1, 2001:db8:2::2]
healthcheck:
type: http
params:
path: /healthz
host: example.com
response-code: "200"
interval: 2s
timeout: 3s
rise: 2
fall: 3
web6:
description: "IPv6 VIP"
address: 2001:db8::1
protocol: tcp
port: 443
backends: [2001:db8:2::1, 2001:db8:2::2]
healthcheck:
type: icmp
interval: 1s
timeout: 3s
fall: 5
`
func TestValidConfig(t *testing.T) {
f, err := parse([]byte(validConfig))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if f.ProbeIPv4Src.String() != "10.0.0.1" {
t.Errorf("probe-ipv4-src: got %s, want 10.0.0.1", f.ProbeIPv4Src)
}
if f.ProbeIPv6Src.String() != "2001:db8:1::1" {
t.Errorf("probe-ipv6-src: got %s, want 2001:db8:1::1", f.ProbeIPv6Src)
}
if f.HealthCheckNetns != "dataplane" {
t.Errorf("healthcheck-netns: got %q, want %q", f.HealthCheckNetns, "dataplane")
}
if f.HealthChecker.TransitionHistory != 5 {
t.Errorf("transition-history: got %d, want 5", f.HealthChecker.TransitionHistory)
}
if len(f.VIPs) != 2 {
t.Fatalf("vips: got %d, want 2", len(f.VIPs))
}
web4 := f.VIPs["web4"]
if web4.HealthCheck.Type != "http" {
t.Errorf("web4 healthcheck type: got %q, want http", web4.HealthCheck.Type)
}
if web4.HealthCheck.Fall != 3 {
t.Errorf("web4 fall: got %d, want 3", web4.HealthCheck.Fall)
}
if web4.HealthCheck.Rise != 2 {
t.Errorf("web4 rise: got %d, want 2", web4.HealthCheck.Rise)
}
if web4.HealthCheck.HTTP == nil {
t.Fatal("web4 HTTP params should not be nil")
}
if web4.HealthCheck.HTTP.Path != "/healthz" {
t.Errorf("web4 params.path: got %q, want /healthz", web4.HealthCheck.HTTP.Path)
}
if web4.HealthCheck.HTTP.Host != "example.com" {
t.Errorf("web4 params.host: got %q, want example.com", web4.HealthCheck.HTTP.Host)
}
if web4.HealthCheck.HTTP.ResponseCodeMin != 200 || web4.HealthCheck.HTTP.ResponseCodeMax != 200 {
t.Errorf("web4 response-code: got %d-%d, want 200-200",
web4.HealthCheck.HTTP.ResponseCodeMin, web4.HealthCheck.HTTP.ResponseCodeMax)
}
web6 := f.VIPs["web6"]
if web6.HealthCheck.Fall != 5 {
t.Errorf("web6 fall: got %d, want 5", web6.HealthCheck.Fall)
}
}
func TestDefaults(t *testing.T) {
cfg := `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
backends: [10.0.0.2]
healthcheck:
type: icmp
interval: 1s
timeout: 2s
`
f, err := parse([]byte(cfg))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if f.HealthCheckNetns != "" {
t.Errorf("default healthcheck-netns: got %q, want empty (default netns)", f.HealthCheckNetns)
}
if f.HealthChecker.TransitionHistory != 5 {
t.Errorf("default transition-history: got %d, want 5", f.HealthChecker.TransitionHistory)
}
hc := f.VIPs["v"].HealthCheck
if hc.Rise != 2 {
t.Errorf("default rise: got %d, want 2", hc.Rise)
}
if hc.Fall != 3 {
t.Errorf("default fall: got %d, want 3", hc.Fall)
}
if f.ProbeIPv4Src != nil {
t.Errorf("probe-ipv4-src should be nil when omitted, got %s", f.ProbeIPv4Src)
}
}
func TestOptionalIntervals(t *testing.T) {
cfg := `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
backends: [10.0.0.2]
healthcheck:
type: icmp
interval: 2s
fast-interval: 500ms
down-interval: 30s
timeout: 1s
`
f, err := parse([]byte(cfg))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
hc := f.VIPs["v"].HealthCheck
if hc.Interval != 2*time.Second {
t.Errorf("interval: got %v, want 2s", hc.Interval)
}
if hc.FastInterval != 500*time.Millisecond {
t.Errorf("fast-interval: got %v, want 500ms", hc.FastInterval)
}
if hc.DownInterval != 30*time.Second {
t.Errorf("down-interval: got %v, want 30s", hc.DownInterval)
}
}
func TestTCPType(t *testing.T) {
cfg := `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
protocol: tcp
port: 80
backends: [10.0.0.2]
healthcheck:
type: tcp
interval: 1s
timeout: 2s
`
f, err := parse([]byte(cfg))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if f.VIPs["v"].HealthCheck.Type != "tcp" {
t.Errorf("type: got %q, want tcp", f.VIPs["v"].HealthCheck.Type)
}
}
func TestValidationErrors(t *testing.T) {
// Minimal valid base to build error cases on top of.
base := func(override string) string {
return `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
backends: [10.0.0.2]
healthcheck:
type: icmp
interval: 1s
timeout: 2s
` + override
}
_ = base
tests := []struct {
name string
yaml string
errSub string
}{
{
name: "wrong family probe-ipv4-src",
yaml: `
maglev:
frontend:
probe-ipv4-src: 2001:db8::1
vips:
v:
address: 192.0.2.1
backends: [10.0.0.2]
healthcheck:
type: icmp
interval: 1s
timeout: 2s
`,
errSub: "probe-ipv4-src",
},
{
name: "mixed backend families",
yaml: `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
backends: [10.0.0.2, 2001:db8::1]
healthcheck:
type: icmp
interval: 1s
timeout: 2s
`,
errSub: "address family",
},
{
name: "port without protocol",
yaml: validPortWithoutProtocol,
errSub: "port requires protocol",
},
{
name: "protocol without port",
yaml: `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
protocol: tcp
backends: [10.0.0.2]
healthcheck:
type: icmp
interval: 1s
timeout: 2s
`,
errSub: "requires port",
},
{
name: "invalid healthcheck type",
yaml: `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
backends: [10.0.0.2]
healthcheck:
type: dns
interval: 1s
timeout: 2s
`,
errSub: "type must be",
},
{
name: "http missing path",
yaml: `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
backends: [10.0.0.2]
healthcheck:
type: http
interval: 1s
timeout: 2s
`,
errSub: "params.path",
},
{
name: "negative interval",
yaml: `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
backends: [10.0.0.2]
healthcheck:
type: icmp
interval: -1s
timeout: 2s
`,
errSub: "positive duration",
},
{
name: "invalid fast-interval",
yaml: `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
backends: [10.0.0.2]
healthcheck:
type: icmp
interval: 1s
fast-interval: -1s
timeout: 2s
`,
errSub: "positive duration",
},
{
name: "fall zero becomes default",
yaml: `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
backends: [10.0.0.2]
healthcheck:
type: icmp
interval: 1s
timeout: 2s
fall: 0
`,
// fall: 0 is treated as omitted → default 3; no error
errSub: "",
},
{
name: "empty backends",
yaml: `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
backends: []
healthcheck:
type: icmp
interval: 1s
timeout: 2s
`,
errSub: "backends must not be empty",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := parse([]byte(tt.yaml))
if tt.errSub == "" {
if err != nil {
t.Fatalf("expected no error, got: %v", err)
}
return
}
if err == nil {
t.Fatalf("expected error containing %q, got nil", tt.errSub)
}
if !contains(err.Error(), tt.errSub) {
t.Errorf("error %q does not contain %q", err.Error(), tt.errSub)
}
})
}
}
const validPortWithoutProtocol = `
maglev:
frontend:
vips:
v:
address: 192.0.2.1
port: 80
backends: [10.0.0.2]
healthcheck:
type: icmp
interval: 1s
timeout: 2s
`
func contains(s, sub string) bool {
return len(s) >= len(sub) && (s == sub || len(sub) == 0 ||
func() bool {
for i := 0; i <= len(s)-len(sub); i++ {
if s[i:i+len(sub)] == sub {
return true
}
}
return false
}())
}