Guard pause/resume against disabled backends; clean up CLI errors
- PauseBackend and ResumeBackend return an error (not bool) when the
backend is disabled, preventing an inconsistent state where the
health state says "paused" but enabled=false.
- DisableBackend and EnableBackend now log uniform backend-transition
lines with from/to instead of separate backend-disable/backend-enable
messages.
- CLI errors strip gRPC boilerplate ("rpc error: code = ... desc = ")
and display the server message in red (when color is enabled). Both
the interactive shell and one-shot mode use the same formatError path.
This commit is contained in:
@@ -2,8 +2,11 @@
|
|||||||
|
|
||||||
package main
|
package main
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
const (
|
const (
|
||||||
ansiBlue = "\x1b[34m"
|
ansiBlue = "\x1b[34m"
|
||||||
|
ansiRed = "\x1b[31m"
|
||||||
ansiReset = "\x1b[0m"
|
ansiReset = "\x1b[0m"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -20,3 +23,19 @@ func label(s string) string {
|
|||||||
}
|
}
|
||||||
return ansiBlue + s + ansiReset
|
return ansiBlue + s + ansiReset
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// formatError returns a user-friendly error string. gRPC status errors are
|
||||||
|
// unwrapped to show only the server's message (no "rpc error: code = ..."
|
||||||
|
// boilerplate). The result is wrapped in red ANSI when color is enabled.
|
||||||
|
func formatError(err error) string {
|
||||||
|
msg := err.Error()
|
||||||
|
// google.golang.org/grpc/status errors format as:
|
||||||
|
// rpc error: code = <Code> desc = <message>
|
||||||
|
if i := strings.Index(msg, " desc = "); i >= 0 {
|
||||||
|
msg = msg[i+len(" desc = "):]
|
||||||
|
}
|
||||||
|
if colorEnabled {
|
||||||
|
return ansiRed + msg + ansiReset
|
||||||
|
}
|
||||||
|
return msg
|
||||||
|
}
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ import (
|
|||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
if err := run(); err != nil {
|
if err := run(); err != nil {
|
||||||
fmt.Fprintf(os.Stderr, "error: %v\n", err)
|
fmt.Fprintf(os.Stderr, "%s\n", formatError(err))
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ func runShell(ctx context.Context, client grpcapi.MaglevClient) error {
|
|||||||
if errors.Is(err, errQuit) {
|
if errors.Is(err, errQuit) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
fmt.Fprintf(rl.Stderr(), "error: %v\n", err)
|
fmt.Fprintf(rl.Stderr(), "%s\n", formatError(err))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -288,12 +288,16 @@ func (c *Checker) GetBackendInfo(name string) (metrics.BackendInfo, bool) {
|
|||||||
// goroutine is cancelled so no further traffic is sent to the backend. The
|
// goroutine is cancelled so no further traffic is sent to the backend. The
|
||||||
// backend's state is set to paused and remains frozen until ResumeBackend is
|
// backend's state is set to paused and remains frozen until ResumeBackend is
|
||||||
// called (which starts a fresh probe goroutine).
|
// called (which starts a fresh probe goroutine).
|
||||||
func (c *Checker) PauseBackend(name string) (BackendSnapshot, bool) {
|
// Returns an error if the backend is not found or is disabled.
|
||||||
|
func (c *Checker) PauseBackend(name string) (BackendSnapshot, error) {
|
||||||
c.mu.Lock()
|
c.mu.Lock()
|
||||||
defer c.mu.Unlock()
|
defer c.mu.Unlock()
|
||||||
w, ok := c.workers[name]
|
w, ok := c.workers[name]
|
||||||
if !ok {
|
if !ok {
|
||||||
return BackendSnapshot{}, false
|
return BackendSnapshot{}, fmt.Errorf("backend %q not found", name)
|
||||||
|
}
|
||||||
|
if !w.entry.Enabled {
|
||||||
|
return BackendSnapshot{}, fmt.Errorf("backend %q is disabled; enable it first", name)
|
||||||
}
|
}
|
||||||
maxHistory := c.cfg.HealthChecker.TransitionHistory
|
maxHistory := c.cfg.HealthChecker.TransitionHistory
|
||||||
if w.backend.Pause(maxHistory) {
|
if w.backend.Pause(maxHistory) {
|
||||||
@@ -305,18 +309,22 @@ func (c *Checker) PauseBackend(name string) (BackendSnapshot, bool) {
|
|||||||
c.emitForBackend(name, w.backend.Address, t, c.cfg.Frontends)
|
c.emitForBackend(name, w.backend.Address, t, c.cfg.Frontends)
|
||||||
}
|
}
|
||||||
w.cancel()
|
w.cancel()
|
||||||
return BackendSnapshot{Health: w.backend, Config: w.entry}, true
|
return BackendSnapshot{Health: w.backend, Config: w.entry}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ResumeBackend resumes health checking for a backend by name. A fresh probe
|
// ResumeBackend resumes health checking for a backend by name. A fresh probe
|
||||||
// goroutine is started and the backend re-enters StateUnknown. The existing
|
// goroutine is started and the backend re-enters StateUnknown. The existing
|
||||||
// transition history is preserved.
|
// transition history is preserved.
|
||||||
func (c *Checker) ResumeBackend(name string) (BackendSnapshot, bool) {
|
// Returns an error if the backend is not found or is disabled.
|
||||||
|
func (c *Checker) ResumeBackend(name string) (BackendSnapshot, error) {
|
||||||
c.mu.Lock()
|
c.mu.Lock()
|
||||||
defer c.mu.Unlock()
|
defer c.mu.Unlock()
|
||||||
w, ok := c.workers[name]
|
w, ok := c.workers[name]
|
||||||
if !ok {
|
if !ok {
|
||||||
return BackendSnapshot{}, false
|
return BackendSnapshot{}, fmt.Errorf("backend %q not found", name)
|
||||||
|
}
|
||||||
|
if !w.entry.Enabled {
|
||||||
|
return BackendSnapshot{}, fmt.Errorf("backend %q is disabled; enable it first", name)
|
||||||
}
|
}
|
||||||
maxHistory := c.cfg.HealthChecker.TransitionHistory
|
maxHistory := c.cfg.HealthChecker.TransitionHistory
|
||||||
if w.backend.Resume(maxHistory) {
|
if w.backend.Resume(maxHistory) {
|
||||||
@@ -333,7 +341,7 @@ func (c *Checker) ResumeBackend(name string) (BackendSnapshot, bool) {
|
|||||||
w.cancel = cancel
|
w.cancel = cancel
|
||||||
w.wakeCh = make(chan struct{}, 1)
|
w.wakeCh = make(chan struct{}, 1)
|
||||||
go c.runProbe(wCtx, name, 0, 1)
|
go c.runProbe(wCtx, name, 0, 1)
|
||||||
return BackendSnapshot{Health: w.backend, Config: w.entry}, true
|
return BackendSnapshot{Health: w.backend, Config: w.entry}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// DisableBackend stops health checking for a backend and removes it from active
|
// DisableBackend stops health checking for a backend and removes it from active
|
||||||
@@ -351,7 +359,10 @@ func (c *Checker) DisableBackend(name string) (BackendSnapshot, bool) {
|
|||||||
}
|
}
|
||||||
maxHistory := c.cfg.HealthChecker.TransitionHistory
|
maxHistory := c.cfg.HealthChecker.TransitionHistory
|
||||||
t := w.backend.Disable(maxHistory)
|
t := w.backend.Disable(maxHistory)
|
||||||
slog.Info("backend-disable", "backend", name)
|
slog.Info("backend-transition", "backend", name,
|
||||||
|
"from", t.From.String(),
|
||||||
|
"to", t.To.String(),
|
||||||
|
)
|
||||||
c.emitForBackend(name, w.backend.Address, t, c.cfg.Frontends)
|
c.emitForBackend(name, w.backend.Address, t, c.cfg.Frontends)
|
||||||
w.cancel()
|
w.cancel()
|
||||||
w.entry.Enabled = false
|
w.entry.Enabled = false
|
||||||
@@ -382,10 +393,14 @@ func (c *Checker) EnableBackend(name string) (BackendSnapshot, bool) {
|
|||||||
}
|
}
|
||||||
maxHistory := c.cfg.HealthChecker.TransitionHistory
|
maxHistory := c.cfg.HealthChecker.TransitionHistory
|
||||||
hc := c.cfg.HealthChecks[entry.HealthCheck]
|
hc := c.cfg.HealthChecks[entry.HealthCheck]
|
||||||
slog.Info("backend-enable", "backend", name)
|
|
||||||
c.startWorker(c.runCtx, name, entry, hc, 0, 1, maxHistory)
|
c.startWorker(c.runCtx, name, entry, hc, 0, 1, maxHistory)
|
||||||
nw := c.workers[name]
|
nw := c.workers[name]
|
||||||
c.emitForBackend(name, nw.backend.Address, nw.backend.Transitions[0], c.cfg.Frontends)
|
t := nw.backend.Transitions[0]
|
||||||
|
slog.Info("backend-transition", "backend", name,
|
||||||
|
"from", t.From.String(),
|
||||||
|
"to", t.To.String(),
|
||||||
|
)
|
||||||
|
c.emitForBackend(name, nw.backend.Address, t, c.cfg.Frontends)
|
||||||
return BackendSnapshot{Health: nw.backend, Config: nw.entry}, true
|
return BackendSnapshot{Health: nw.backend, Config: nw.entry}, true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -358,19 +358,30 @@ func TestPauseResume(t *testing.T) {
|
|||||||
}
|
}
|
||||||
c.mu.Unlock()
|
c.mu.Unlock()
|
||||||
|
|
||||||
b, ok := c.PauseBackend("be0")
|
b, err := c.PauseBackend("be0")
|
||||||
if !ok {
|
if err != nil {
|
||||||
t.Fatal("PauseBackend: not found")
|
t.Fatalf("PauseBackend: %v", err)
|
||||||
}
|
}
|
||||||
if b.Health.State != health.StatePaused {
|
if b.Health.State != health.StatePaused {
|
||||||
t.Errorf("after pause: %s", b.Health.State)
|
t.Errorf("after pause: %s", b.Health.State)
|
||||||
}
|
}
|
||||||
|
|
||||||
b, ok = c.ResumeBackend("be0")
|
b, err = c.ResumeBackend("be0")
|
||||||
if !ok {
|
if err != nil {
|
||||||
t.Fatal("ResumeBackend: not found")
|
t.Fatalf("ResumeBackend: %v", err)
|
||||||
}
|
}
|
||||||
if b.Health.State != health.StateUnknown {
|
if b.Health.State != health.StateUnknown {
|
||||||
t.Errorf("after resume: %s", b.Health.State)
|
t.Errorf("after resume: %s", b.Health.State)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pause/resume on a disabled backend must return an error.
|
||||||
|
c.mu.Lock()
|
||||||
|
c.workers["be0"].entry.Enabled = false
|
||||||
|
c.mu.Unlock()
|
||||||
|
if _, err := c.PauseBackend("be0"); err == nil {
|
||||||
|
t.Error("PauseBackend on disabled backend: expected error")
|
||||||
|
}
|
||||||
|
if _, err := c.ResumeBackend("be0"); err == nil {
|
||||||
|
t.Error("ResumeBackend on disabled backend: expected error")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -64,18 +64,18 @@ func (s *Server) GetBackend(_ context.Context, req *GetBackendRequest) (*Backend
|
|||||||
|
|
||||||
// PauseBackend pauses health checking for a backend by name.
|
// PauseBackend pauses health checking for a backend by name.
|
||||||
func (s *Server) PauseBackend(_ context.Context, req *BackendRequest) (*BackendInfo, error) {
|
func (s *Server) PauseBackend(_ context.Context, req *BackendRequest) (*BackendInfo, error) {
|
||||||
b, ok := s.checker.PauseBackend(req.Name)
|
b, err := s.checker.PauseBackend(req.Name)
|
||||||
if !ok {
|
if err != nil {
|
||||||
return nil, status.Errorf(codes.NotFound, "backend %q not found", req.Name)
|
return nil, status.Errorf(codes.FailedPrecondition, "%v", err)
|
||||||
}
|
}
|
||||||
return backendToProto(b), nil
|
return backendToProto(b), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// ResumeBackend resumes health checking for a backend by name.
|
// ResumeBackend resumes health checking for a backend by name.
|
||||||
func (s *Server) ResumeBackend(_ context.Context, req *BackendRequest) (*BackendInfo, error) {
|
func (s *Server) ResumeBackend(_ context.Context, req *BackendRequest) (*BackendInfo, error) {
|
||||||
b, ok := s.checker.ResumeBackend(req.Name)
|
b, err := s.checker.ResumeBackend(req.Name)
|
||||||
if !ok {
|
if err != nil {
|
||||||
return nil, status.Errorf(codes.NotFound, "backend %q not found", req.Name)
|
return nil, status.Errorf(codes.FailedPrecondition, "%v", err)
|
||||||
}
|
}
|
||||||
return backendToProto(b), nil
|
return backendToProto(b), nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user