Add WatchEvents, enable/disable/weight RPCs, and config check

gRPC / proto - Rename WatchBackendEvents → WatchEvents; return a stream of Event oneof (LogEvent, BackendEvent, FrontendEvent) with optional filter flags (log, log_level, backend, frontend) - Add EnableBackend, DisableBackend, SetFrontendPoolBackendWeight RPCs - Rename PauseResumeRequest → BackendRequest - Add CheckConfig RPC returning ok/parse_error/semantic_error maglevd - Route slog through a LogBroadcaster (slog.Handler) so WatchEvents subscribers can receive structured log records independently of the daemon's own --log-level - Add --reflection flag (default true) to toggle gRPC server reflection - Add --check flag: validates config file and exits 0/1/2 - SIGHUP: use config.Check before applying reload; log parse vs semantic error separately; refuse reload on any error - Rename default config path /etc/maglev → /etc/vpp-maglev maglevc - Add 'watch events [num <n>] [log [level <level>]] [backend] [frontend]' command; prints compact protojson, stops on any keypress or Ctrl-C; uses cbreak mode (not raw) so output post-processing is preserved - Add 'set backend <name> enable|disable' - Add 'set frontend <name> pool <pool> backend <name> weight <0-100>' - Add 'config check' command Debian packaging - Rename service unit to vpp-maglevd.service - Rename conffiles to /etc/default/vpp-maglev and /etc/vpp-maglev/ - Create maglevd system user/group in postinst; add to vpp group if present - Add postrm; add adduser to Depends
2026-04-11 16:42:11 +02:00
parent d612086a5f
commit 58391f5463
26 changed files with 1969 additions and 400 deletions
--- a/internal/checker/checker.go
+++ b/internal/checker/checker.go
@@ -4,6 +4,7 @@ package checker

 import (
 	"context"
+	"fmt"
 	"log/slog"
 	"net"
 	"sort"
@@ -42,6 +43,7 @@ type worker struct {
 // Each backend is probed exactly once, regardless of how many frontends
 // reference it.
 type Checker struct {
+	runCtx  context.Context // set in Run; used by EnableBackend to start new goroutines
 	cfg     *config.Config
 	mu      sync.RWMutex
 	workers map[string]*worker // keyed by backend name
@@ -67,6 +69,7 @@ func (c *Checker) Run(ctx context.Context) error {
 	go c.fanOut(ctx)

 	c.mu.Lock()
+	c.runCtx = ctx // safe: held under mu before any EnableBackend call can read it
 	names := activeBackendNames(c.cfg)
 	maxHistory := c.cfg.HealthChecker.TransitionHistory
 	for i, name := range names {
@@ -167,6 +170,36 @@ func (c *Checker) GetFrontend(name string) (config.Frontend, bool) {
 	return v, ok
 }

+// SetFrontendPoolBackendWeight updates the weight of a backend within a named
+// pool of a frontend. Returns the updated FrontendInfo and a descriptive error
+// if the frontend, pool, or backend is not found or the weight is out of range.
+func (c *Checker) SetFrontendPoolBackendWeight(frontendName, poolName, backendName string, weight int) (config.Frontend, error) {
+	if weight < 0 || weight > 100 {
+		return config.Frontend{}, fmt.Errorf("weight %d out of range [0, 100]", weight)
+	}
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	fe, ok := c.cfg.Frontends[frontendName]
+	if !ok {
+		return config.Frontend{}, fmt.Errorf("frontend %q not found", frontendName)
+	}
+	for i, pool := range fe.Pools {
+		if pool.Name != poolName {
+			continue
+		}
+		pb, ok := pool.Backends[backendName]
+		if !ok {
+			return config.Frontend{}, fmt.Errorf("backend %q not found in pool %q", backendName, poolName)
+		}
+		pb.Weight = weight
+		fe.Pools[i].Backends[backendName] = pb
+		c.cfg.Frontends[frontendName] = fe
+		slog.Info("frontend-pool-weight", "frontend", frontendName, "pool", poolName, "backend", backendName, "weight", weight)
+		return fe, nil
+	}
+	return config.Frontend{}, fmt.Errorf("pool %q not found in frontend %q", poolName, frontendName)
+}
+
 // ListHealthChecks returns the names of all configured health checks, sorted.
 func (c *Checker) ListHealthChecks() []string {
 	c.mu.RLock()
@@ -278,6 +311,59 @@ func (c *Checker) ResumeBackend(name string) (BackendSnapshot, bool) {
 	return BackendSnapshot{Health: w.backend, Config: w.entry}, true
 }

+// DisableBackend stops health checking for a backend and removes it from active
+// rotation. The worker entry is kept in the map so the backend remains visible
+// via GetBackend and can be re-enabled with EnableBackend.
+func (c *Checker) DisableBackend(name string) (BackendSnapshot, bool) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	w, ok := c.workers[name]
+	if !ok {
+		return BackendSnapshot{}, false
+	}
+	if !w.entry.Enabled {
+		return BackendSnapshot{Health: w.backend, Config: w.entry}, true
+	}
+	maxHistory := c.cfg.HealthChecker.TransitionHistory
+	t := w.backend.Remove(maxHistory)
+	slog.Info("backend-disable", "backend", name)
+	c.emitForBackend(name, w.backend.Address, t, c.cfg.Frontends)
+	w.cancel()
+	w.entry.Enabled = false
+	if b, ok := c.cfg.Backends[name]; ok {
+		b.Enabled = false
+		c.cfg.Backends[name] = b
+	}
+	return BackendSnapshot{Health: w.backend, Config: w.entry}, true
+}
+
+// EnableBackend re-enables a previously disabled backend. A fresh probe
+// goroutine is started and the backend re-enters StateUnknown.
+func (c *Checker) EnableBackend(name string) (BackendSnapshot, bool) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	w, ok := c.workers[name]
+	if !ok {
+		return BackendSnapshot{}, false
+	}
+	if w.entry.Enabled {
+		return BackendSnapshot{Health: w.backend, Config: w.entry}, true
+	}
+	entry := w.entry
+	entry.Enabled = true
+	if b, ok := c.cfg.Backends[name]; ok {
+		b.Enabled = true
+		c.cfg.Backends[name] = b
+	}
+	maxHistory := c.cfg.HealthChecker.TransitionHistory
+	hc := c.cfg.HealthChecks[entry.HealthCheck]
+	slog.Info("backend-enable", "backend", name)
+	c.startWorker(c.runCtx, name, entry, hc, 0, 1, maxHistory)
+	nw := c.workers[name]
+	c.emitForBackend(name, nw.backend.Address, nw.backend.Transitions[0], c.cfg.Frontends)
+	return BackendSnapshot{Health: nw.backend, Config: nw.entry}, true
+}
+
 // ---- internal --------------------------------------------------------------

 // startWorker creates a Backend and launches a probe goroutine.
--- a/internal/checker/checker_test.go
+++ b/internal/checker/checker_test.go
@@ -246,6 +246,98 @@ func TestSubscribe(t *testing.T) {
 	}
 }

+func TestSetFrontendPoolBackendWeight(t *testing.T) {
+	cfg := makeTestConfig(time.Hour, 3, 2)
+	c := New(cfg)
+
+	// Valid weight change.
+	fe, err := c.SetFrontendPoolBackendWeight("web", "primary", "be0", 42)
+	if err != nil {
+		t.Fatalf("SetFrontendPoolBackendWeight: %v", err)
+	}
+	if fe.Pools[0].Backends["be0"].Weight != 42 {
+		t.Errorf("weight: got %d, want 42", fe.Pools[0].Backends["be0"].Weight)
+	}
+	// Persisted in live config.
+	got, _ := c.GetFrontend("web")
+	if got.Pools[0].Backends["be0"].Weight != 42 {
+		t.Errorf("config weight: got %d, want 42", got.Pools[0].Backends["be0"].Weight)
+	}
+
+	// Out-of-range weight.
+	if _, err := c.SetFrontendPoolBackendWeight("web", "primary", "be0", 101); err == nil {
+		t.Error("expected error for weight 101")
+	}
+
+	// Unknown frontend.
+	if _, err := c.SetFrontendPoolBackendWeight("nope", "primary", "be0", 50); err == nil {
+		t.Error("expected error for unknown frontend")
+	}
+
+	// Unknown pool.
+	if _, err := c.SetFrontendPoolBackendWeight("web", "nope", "be0", 50); err == nil {
+		t.Error("expected error for unknown pool")
+	}
+
+	// Unknown backend.
+	if _, err := c.SetFrontendPoolBackendWeight("web", "primary", "nope", 50); err == nil {
+		t.Error("expected error for unknown backend in pool")
+	}
+}
+
+func TestEnableDisable(t *testing.T) {
+	cfg := makeTestConfig(time.Hour, 3, 2)
+	c := New(cfg)
+
+	ctx, cancel := context.WithCancel(context.Background())
+	defer cancel()
+	go c.fanOut(ctx)
+
+	// Seed a worker as EnableBackend/DisableBackend require one in c.workers.
+	wCtx, wCancel := context.WithCancel(ctx)
+	c.mu.Lock()
+	c.runCtx = ctx
+	c.workers["be0"] = &worker{
+		backend: health.New("be0", net.ParseIP("10.0.0.2"), 2, 3),
+		hc:      cfg.HealthChecks["icmp"],
+		entry:   cfg.Backends["be0"],
+		cancel:  wCancel,
+	}
+	c.mu.Unlock()
+	_ = wCtx
+
+	b, ok := c.DisableBackend("be0")
+	if !ok {
+		t.Fatal("DisableBackend: not found")
+	}
+	if b.Health.State != health.StateRemoved {
+		t.Errorf("after disable: state=%s, want removed", b.Health.State)
+	}
+	if b.Config.Enabled {
+		t.Error("after disable: Enabled should be false")
+	}
+
+	// Backend should still be visible after disable.
+	snap, ok := c.GetBackend("be0")
+	if !ok {
+		t.Fatal("GetBackend after disable: not found")
+	}
+	if snap.Config.Enabled {
+		t.Error("GetBackend after disable: Enabled should be false")
+	}
+
+	b, ok = c.EnableBackend("be0")
+	if !ok {
+		t.Fatal("EnableBackend: not found")
+	}
+	if b.Health.State != health.StateUnknown {
+		t.Errorf("after enable: state=%s, want unknown", b.Health.State)
+	}
+	if !b.Config.Enabled {
+		t.Error("after enable: Enabled should be true")
+	}
+}
+
 func TestPauseResume(t *testing.T) {
 	cfg := makeTestConfig(time.Hour, 3, 2)
 	c := New(cfg)