Add WatchEvents, enable/disable/weight RPCs, and config check

gRPC / proto
- Rename WatchBackendEvents → WatchEvents; return a stream of Event
  oneof (LogEvent, BackendEvent, FrontendEvent) with optional filter
  flags (log, log_level, backend, frontend)
- Add EnableBackend, DisableBackend, SetFrontendPoolBackendWeight RPCs
- Rename PauseResumeRequest → BackendRequest
- Add CheckConfig RPC returning ok/parse_error/semantic_error

maglevd
- Route slog through a LogBroadcaster (slog.Handler) so WatchEvents
  subscribers can receive structured log records independently of the
  daemon's own --log-level
- Add --reflection flag (default true) to toggle gRPC server reflection
- Add --check flag: validates config file and exits 0/1/2
- SIGHUP: use config.Check before applying reload; log parse vs semantic
  error separately; refuse reload on any error
- Rename default config path /etc/maglev → /etc/vpp-maglev

maglevc
- Add 'watch events [num <n>] [log [level <level>]] [backend] [frontend]'
  command; prints compact protojson, stops on any keypress or Ctrl-C;
  uses cbreak mode (not raw) so output post-processing is preserved
- Add 'set backend <name> enable|disable'
- Add 'set frontend <name> pool <pool> backend <name> weight <0-100>'
- Add 'config check' command

Debian packaging
- Rename service unit to vpp-maglevd.service
- Rename conffiles to /etc/default/vpp-maglev and /etc/vpp-maglev/
- Create maglevd system user/group in postinst; add to vpp group if present
- Add postrm; add adduser to Depends
This commit is contained in:
2026-04-11 16:42:11 +02:00
parent d612086a5f
commit 58391f5463
26 changed files with 1969 additions and 400 deletions

View File

@@ -4,6 +4,7 @@ package checker
import (
"context"
"fmt"
"log/slog"
"net"
"sort"
@@ -42,6 +43,7 @@ type worker struct {
// Each backend is probed exactly once, regardless of how many frontends
// reference it.
type Checker struct {
runCtx context.Context // set in Run; used by EnableBackend to start new goroutines
cfg *config.Config
mu sync.RWMutex
workers map[string]*worker // keyed by backend name
@@ -67,6 +69,7 @@ func (c *Checker) Run(ctx context.Context) error {
go c.fanOut(ctx)
c.mu.Lock()
c.runCtx = ctx // safe: held under mu before any EnableBackend call can read it
names := activeBackendNames(c.cfg)
maxHistory := c.cfg.HealthChecker.TransitionHistory
for i, name := range names {
@@ -167,6 +170,36 @@ func (c *Checker) GetFrontend(name string) (config.Frontend, bool) {
return v, ok
}
// SetFrontendPoolBackendWeight updates the weight of a backend within a named
// pool of a frontend. Returns the updated FrontendInfo and a descriptive error
// if the frontend, pool, or backend is not found or the weight is out of range.
func (c *Checker) SetFrontendPoolBackendWeight(frontendName, poolName, backendName string, weight int) (config.Frontend, error) {
if weight < 0 || weight > 100 {
return config.Frontend{}, fmt.Errorf("weight %d out of range [0, 100]", weight)
}
c.mu.Lock()
defer c.mu.Unlock()
fe, ok := c.cfg.Frontends[frontendName]
if !ok {
return config.Frontend{}, fmt.Errorf("frontend %q not found", frontendName)
}
for i, pool := range fe.Pools {
if pool.Name != poolName {
continue
}
pb, ok := pool.Backends[backendName]
if !ok {
return config.Frontend{}, fmt.Errorf("backend %q not found in pool %q", backendName, poolName)
}
pb.Weight = weight
fe.Pools[i].Backends[backendName] = pb
c.cfg.Frontends[frontendName] = fe
slog.Info("frontend-pool-weight", "frontend", frontendName, "pool", poolName, "backend", backendName, "weight", weight)
return fe, nil
}
return config.Frontend{}, fmt.Errorf("pool %q not found in frontend %q", poolName, frontendName)
}
// ListHealthChecks returns the names of all configured health checks, sorted.
func (c *Checker) ListHealthChecks() []string {
c.mu.RLock()
@@ -278,6 +311,59 @@ func (c *Checker) ResumeBackend(name string) (BackendSnapshot, bool) {
return BackendSnapshot{Health: w.backend, Config: w.entry}, true
}
// DisableBackend stops health checking for a backend and removes it from active
// rotation. The worker entry is kept in the map so the backend remains visible
// via GetBackend and can be re-enabled with EnableBackend.
func (c *Checker) DisableBackend(name string) (BackendSnapshot, bool) {
c.mu.Lock()
defer c.mu.Unlock()
w, ok := c.workers[name]
if !ok {
return BackendSnapshot{}, false
}
if !w.entry.Enabled {
return BackendSnapshot{Health: w.backend, Config: w.entry}, true
}
maxHistory := c.cfg.HealthChecker.TransitionHistory
t := w.backend.Remove(maxHistory)
slog.Info("backend-disable", "backend", name)
c.emitForBackend(name, w.backend.Address, t, c.cfg.Frontends)
w.cancel()
w.entry.Enabled = false
if b, ok := c.cfg.Backends[name]; ok {
b.Enabled = false
c.cfg.Backends[name] = b
}
return BackendSnapshot{Health: w.backend, Config: w.entry}, true
}
// EnableBackend re-enables a previously disabled backend. A fresh probe
// goroutine is started and the backend re-enters StateUnknown.
func (c *Checker) EnableBackend(name string) (BackendSnapshot, bool) {
c.mu.Lock()
defer c.mu.Unlock()
w, ok := c.workers[name]
if !ok {
return BackendSnapshot{}, false
}
if w.entry.Enabled {
return BackendSnapshot{Health: w.backend, Config: w.entry}, true
}
entry := w.entry
entry.Enabled = true
if b, ok := c.cfg.Backends[name]; ok {
b.Enabled = true
c.cfg.Backends[name] = b
}
maxHistory := c.cfg.HealthChecker.TransitionHistory
hc := c.cfg.HealthChecks[entry.HealthCheck]
slog.Info("backend-enable", "backend", name)
c.startWorker(c.runCtx, name, entry, hc, 0, 1, maxHistory)
nw := c.workers[name]
c.emitForBackend(name, nw.backend.Address, nw.backend.Transitions[0], c.cfg.Frontends)
return BackendSnapshot{Health: nw.backend, Config: nw.entry}, true
}
// ---- internal --------------------------------------------------------------
// startWorker creates a Backend and launches a probe goroutine.

View File

@@ -246,6 +246,98 @@ func TestSubscribe(t *testing.T) {
}
}
func TestSetFrontendPoolBackendWeight(t *testing.T) {
cfg := makeTestConfig(time.Hour, 3, 2)
c := New(cfg)
// Valid weight change.
fe, err := c.SetFrontendPoolBackendWeight("web", "primary", "be0", 42)
if err != nil {
t.Fatalf("SetFrontendPoolBackendWeight: %v", err)
}
if fe.Pools[0].Backends["be0"].Weight != 42 {
t.Errorf("weight: got %d, want 42", fe.Pools[0].Backends["be0"].Weight)
}
// Persisted in live config.
got, _ := c.GetFrontend("web")
if got.Pools[0].Backends["be0"].Weight != 42 {
t.Errorf("config weight: got %d, want 42", got.Pools[0].Backends["be0"].Weight)
}
// Out-of-range weight.
if _, err := c.SetFrontendPoolBackendWeight("web", "primary", "be0", 101); err == nil {
t.Error("expected error for weight 101")
}
// Unknown frontend.
if _, err := c.SetFrontendPoolBackendWeight("nope", "primary", "be0", 50); err == nil {
t.Error("expected error for unknown frontend")
}
// Unknown pool.
if _, err := c.SetFrontendPoolBackendWeight("web", "nope", "be0", 50); err == nil {
t.Error("expected error for unknown pool")
}
// Unknown backend.
if _, err := c.SetFrontendPoolBackendWeight("web", "primary", "nope", 50); err == nil {
t.Error("expected error for unknown backend in pool")
}
}
func TestEnableDisable(t *testing.T) {
cfg := makeTestConfig(time.Hour, 3, 2)
c := New(cfg)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go c.fanOut(ctx)
// Seed a worker as EnableBackend/DisableBackend require one in c.workers.
wCtx, wCancel := context.WithCancel(ctx)
c.mu.Lock()
c.runCtx = ctx
c.workers["be0"] = &worker{
backend: health.New("be0", net.ParseIP("10.0.0.2"), 2, 3),
hc: cfg.HealthChecks["icmp"],
entry: cfg.Backends["be0"],
cancel: wCancel,
}
c.mu.Unlock()
_ = wCtx
b, ok := c.DisableBackend("be0")
if !ok {
t.Fatal("DisableBackend: not found")
}
if b.Health.State != health.StateRemoved {
t.Errorf("after disable: state=%s, want removed", b.Health.State)
}
if b.Config.Enabled {
t.Error("after disable: Enabled should be false")
}
// Backend should still be visible after disable.
snap, ok := c.GetBackend("be0")
if !ok {
t.Fatal("GetBackend after disable: not found")
}
if snap.Config.Enabled {
t.Error("GetBackend after disable: Enabled should be false")
}
b, ok = c.EnableBackend("be0")
if !ok {
t.Fatal("EnableBackend: not found")
}
if b.Health.State != health.StateUnknown {
t.Errorf("after enable: state=%s, want unknown", b.Health.State)
}
if !b.Config.Enabled {
t.Error("after enable: Enabled should be true")
}
}
func TestPauseResume(t *testing.T) {
cfg := makeTestConfig(time.Hour, 3, 2)
c := New(cfg)