From 74448cf6d0bca73725d1329b9561f690620d0ce9 Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Sat, 11 Apr 2026 21:19:05 +0200 Subject: [PATCH] Guard pause/resume against disabled backends; clean up CLI errors - PauseBackend and ResumeBackend return an error (not bool) when the backend is disabled, preventing an inconsistent state where the health state says "paused" but enabled=false. - DisableBackend and EnableBackend now log uniform backend-transition lines with from/to instead of separate backend-disable/backend-enable messages. - CLI errors strip gRPC boilerplate ("rpc error: code = ... desc = ") and display the server message in red (when color is enabled). Both the interactive shell and one-shot mode use the same formatError path. --- cmd/maglevc/color.go | 19 ++++++++++++++++++ cmd/maglevc/main.go | 2 +- cmd/maglevc/shell.go | 2 +- internal/checker/checker.go | 33 +++++++++++++++++++++++--------- internal/checker/checker_test.go | 23 ++++++++++++++++------ internal/grpcapi/server.go | 12 ++++++------ 6 files changed, 68 insertions(+), 23 deletions(-) diff --git a/cmd/maglevc/color.go b/cmd/maglevc/color.go index 0ec843a..05a8935 100644 --- a/cmd/maglevc/color.go +++ b/cmd/maglevc/color.go @@ -2,8 +2,11 @@ package main +import "strings" + const ( ansiBlue = "\x1b[34m" + ansiRed = "\x1b[31m" ansiReset = "\x1b[0m" ) @@ -20,3 +23,19 @@ func label(s string) string { } return ansiBlue + s + ansiReset } + +// formatError returns a user-friendly error string. gRPC status errors are +// unwrapped to show only the server's message (no "rpc error: code = ..." +// boilerplate). The result is wrapped in red ANSI when color is enabled. +func formatError(err error) string { + msg := err.Error() + // google.golang.org/grpc/status errors format as: + // rpc error: code = desc = + if i := strings.Index(msg, " desc = "); i >= 0 { + msg = msg[i+len(" desc = "):] + } + if colorEnabled { + return ansiRed + msg + ansiReset + } + return msg +} diff --git a/cmd/maglevc/main.go b/cmd/maglevc/main.go index a11a798..ee59b5b 100644 --- a/cmd/maglevc/main.go +++ b/cmd/maglevc/main.go @@ -18,7 +18,7 @@ import ( func main() { if err := run(); err != nil { - fmt.Fprintf(os.Stderr, "error: %v\n", err) + fmt.Fprintf(os.Stderr, "%s\n", formatError(err)) os.Exit(1) } } diff --git a/cmd/maglevc/shell.go b/cmd/maglevc/shell.go index 8b2eb59..e5e065e 100644 --- a/cmd/maglevc/shell.go +++ b/cmd/maglevc/shell.go @@ -59,7 +59,7 @@ func runShell(ctx context.Context, client grpcapi.MaglevClient) error { if errors.Is(err, errQuit) { return nil } - fmt.Fprintf(rl.Stderr(), "error: %v\n", err) + fmt.Fprintf(rl.Stderr(), "%s\n", formatError(err)) } } } diff --git a/internal/checker/checker.go b/internal/checker/checker.go index e00c915..9e4298b 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -288,12 +288,16 @@ func (c *Checker) GetBackendInfo(name string) (metrics.BackendInfo, bool) { // goroutine is cancelled so no further traffic is sent to the backend. The // backend's state is set to paused and remains frozen until ResumeBackend is // called (which starts a fresh probe goroutine). -func (c *Checker) PauseBackend(name string) (BackendSnapshot, bool) { +// Returns an error if the backend is not found or is disabled. +func (c *Checker) PauseBackend(name string) (BackendSnapshot, error) { c.mu.Lock() defer c.mu.Unlock() w, ok := c.workers[name] if !ok { - return BackendSnapshot{}, false + return BackendSnapshot{}, fmt.Errorf("backend %q not found", name) + } + if !w.entry.Enabled { + return BackendSnapshot{}, fmt.Errorf("backend %q is disabled; enable it first", name) } maxHistory := c.cfg.HealthChecker.TransitionHistory if w.backend.Pause(maxHistory) { @@ -305,18 +309,22 @@ func (c *Checker) PauseBackend(name string) (BackendSnapshot, bool) { c.emitForBackend(name, w.backend.Address, t, c.cfg.Frontends) } w.cancel() - return BackendSnapshot{Health: w.backend, Config: w.entry}, true + return BackendSnapshot{Health: w.backend, Config: w.entry}, nil } // ResumeBackend resumes health checking for a backend by name. A fresh probe // goroutine is started and the backend re-enters StateUnknown. The existing // transition history is preserved. -func (c *Checker) ResumeBackend(name string) (BackendSnapshot, bool) { +// Returns an error if the backend is not found or is disabled. +func (c *Checker) ResumeBackend(name string) (BackendSnapshot, error) { c.mu.Lock() defer c.mu.Unlock() w, ok := c.workers[name] if !ok { - return BackendSnapshot{}, false + return BackendSnapshot{}, fmt.Errorf("backend %q not found", name) + } + if !w.entry.Enabled { + return BackendSnapshot{}, fmt.Errorf("backend %q is disabled; enable it first", name) } maxHistory := c.cfg.HealthChecker.TransitionHistory if w.backend.Resume(maxHistory) { @@ -333,7 +341,7 @@ func (c *Checker) ResumeBackend(name string) (BackendSnapshot, bool) { w.cancel = cancel w.wakeCh = make(chan struct{}, 1) go c.runProbe(wCtx, name, 0, 1) - return BackendSnapshot{Health: w.backend, Config: w.entry}, true + return BackendSnapshot{Health: w.backend, Config: w.entry}, nil } // DisableBackend stops health checking for a backend and removes it from active @@ -351,7 +359,10 @@ func (c *Checker) DisableBackend(name string) (BackendSnapshot, bool) { } maxHistory := c.cfg.HealthChecker.TransitionHistory t := w.backend.Disable(maxHistory) - slog.Info("backend-disable", "backend", name) + slog.Info("backend-transition", "backend", name, + "from", t.From.String(), + "to", t.To.String(), + ) c.emitForBackend(name, w.backend.Address, t, c.cfg.Frontends) w.cancel() w.entry.Enabled = false @@ -382,10 +393,14 @@ func (c *Checker) EnableBackend(name string) (BackendSnapshot, bool) { } maxHistory := c.cfg.HealthChecker.TransitionHistory hc := c.cfg.HealthChecks[entry.HealthCheck] - slog.Info("backend-enable", "backend", name) c.startWorker(c.runCtx, name, entry, hc, 0, 1, maxHistory) nw := c.workers[name] - c.emitForBackend(name, nw.backend.Address, nw.backend.Transitions[0], c.cfg.Frontends) + t := nw.backend.Transitions[0] + slog.Info("backend-transition", "backend", name, + "from", t.From.String(), + "to", t.To.String(), + ) + c.emitForBackend(name, nw.backend.Address, t, c.cfg.Frontends) return BackendSnapshot{Health: nw.backend, Config: nw.entry}, true } diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index d15b992..44e45aa 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -358,19 +358,30 @@ func TestPauseResume(t *testing.T) { } c.mu.Unlock() - b, ok := c.PauseBackend("be0") - if !ok { - t.Fatal("PauseBackend: not found") + b, err := c.PauseBackend("be0") + if err != nil { + t.Fatalf("PauseBackend: %v", err) } if b.Health.State != health.StatePaused { t.Errorf("after pause: %s", b.Health.State) } - b, ok = c.ResumeBackend("be0") - if !ok { - t.Fatal("ResumeBackend: not found") + b, err = c.ResumeBackend("be0") + if err != nil { + t.Fatalf("ResumeBackend: %v", err) } if b.Health.State != health.StateUnknown { t.Errorf("after resume: %s", b.Health.State) } + + // Pause/resume on a disabled backend must return an error. + c.mu.Lock() + c.workers["be0"].entry.Enabled = false + c.mu.Unlock() + if _, err := c.PauseBackend("be0"); err == nil { + t.Error("PauseBackend on disabled backend: expected error") + } + if _, err := c.ResumeBackend("be0"); err == nil { + t.Error("ResumeBackend on disabled backend: expected error") + } } diff --git a/internal/grpcapi/server.go b/internal/grpcapi/server.go index 30007cf..cf7b929 100644 --- a/internal/grpcapi/server.go +++ b/internal/grpcapi/server.go @@ -64,18 +64,18 @@ func (s *Server) GetBackend(_ context.Context, req *GetBackendRequest) (*Backend // PauseBackend pauses health checking for a backend by name. func (s *Server) PauseBackend(_ context.Context, req *BackendRequest) (*BackendInfo, error) { - b, ok := s.checker.PauseBackend(req.Name) - if !ok { - return nil, status.Errorf(codes.NotFound, "backend %q not found", req.Name) + b, err := s.checker.PauseBackend(req.Name) + if err != nil { + return nil, status.Errorf(codes.FailedPrecondition, "%v", err) } return backendToProto(b), nil } // ResumeBackend resumes health checking for a backend by name. func (s *Server) ResumeBackend(_ context.Context, req *BackendRequest) (*BackendInfo, error) { - b, ok := s.checker.ResumeBackend(req.Name) - if !ok { - return nil, status.Errorf(codes.NotFound, "backend %q not found", req.Name) + b, err := s.checker.ResumeBackend(req.Name) + if err != nil { + return nil, status.Errorf(codes.FailedPrecondition, "%v", err) } return backendToProto(b), nil }