From 56a4a6ba25d00330323f3cf376e6cc028f0fde6a Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Sat, 11 Apr 2026 03:04:47 +0200 Subject: [PATCH] Some output tweaks; and some additional transition events upon resume (paused->unknown->{up|down}) --- cmd/maglevc/commands.go | 57 ++++++++++++++++++++++++++++++++--- internal/checker/checker.go | 24 ++++++++++++--- internal/health/state.go | 8 +++-- internal/health/state_test.go | 21 +++++-------- 4 files changed, 85 insertions(+), 25 deletions(-) diff --git a/cmd/maglevc/commands.go b/cmd/maglevc/commands.go index 355857d..c48da1d 100644 --- a/cmd/maglevc/commands.go +++ b/cmd/maglevc/commands.go @@ -5,8 +5,9 @@ package main import ( "context" "fmt" - "text/tabwriter" "os" + "strings" + "text/tabwriter" "time" "git.ipng.ch/ipng/vpp-maglev/internal/grpcapi" @@ -195,16 +196,27 @@ func runShowBackend(ctx context.Context, client grpcapi.MaglevClient, args []str w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) fmt.Fprintf(w, "name\t%s\n", info.Name) fmt.Fprintf(w, "address\t%s\n", info.Address) - fmt.Fprintf(w, "state\t%s\n", info.State) + stateDur := "" + if len(info.Transitions) > 0 { + since := time.Since(time.Unix(0, info.Transitions[0].AtUnixNs)) + stateDur = " for " + formatDuration(since) + } + fmt.Fprintf(w, "state\t%s%s\n", info.State, stateDur) fmt.Fprintf(w, "enabled\t%v\n", info.Enabled) fmt.Fprintf(w, "weight\t%d\n", info.Weight) fmt.Fprintf(w, "healthcheck\t%s\n", info.Healthcheck) for i, t := range info.Transitions { + ts := time.Unix(0, t.AtUnixNs) + label := "" if i == 0 { - fmt.Fprintf(w, "transitions\t%s → %s\n", t.From, t.To) - } else { - fmt.Fprintf(w, "\t%s → %s\n", t.From, t.To) + label = "transitions" } + fmt.Fprintf(w, "%s\t%s → %s\t%s\t%s\n", + label, + t.From, t.To, + ts.Format("2006-01-02 15:04:05.000"), + formatAgo(time.Since(ts)), + ) } return w.Flush() } @@ -305,3 +317,38 @@ func runNotImplemented(_ context.Context, _ grpcapi.MaglevClient, _ []string) er fmt.Println("not implemented yet") return nil } + +// formatDuration formats a duration as Xd Xh Xm Xs without milliseconds. +func formatDuration(d time.Duration) string { + if d < 0 { + d = 0 + } + d = d.Truncate(time.Second) + + days := int(d.Hours()) / 24 + d -= time.Duration(days) * 24 * time.Hour + hours := int(d.Hours()) + d -= time.Duration(hours) * time.Hour + minutes := int(d.Minutes()) + d -= time.Duration(minutes) * time.Minute + seconds := int(d.Seconds()) + + var b strings.Builder + if days > 0 { + fmt.Fprintf(&b, "%dd", days) + } + if hours > 0 { + fmt.Fprintf(&b, "%dh", hours) + } + if minutes > 0 { + fmt.Fprintf(&b, "%dm", minutes) + } + if seconds > 0 || b.Len() == 0 { + fmt.Fprintf(&b, "%ds", seconds) + } + return b.String() +} + +func formatAgo(d time.Duration) string { + return formatDuration(d) + " ago" +} diff --git a/internal/checker/checker.go b/internal/checker/checker.go index 065d861..888f64a 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -35,6 +35,7 @@ type worker struct { hc config.HealthCheck entry config.Backend cancel context.CancelFunc + wakeCh chan struct{} // closed/signalled to interrupt probe sleep on resume } // Checker orchestrates health probing for all backends. @@ -236,8 +237,12 @@ func (c *Checker) PauseBackend(name string) (BackendSnapshot, bool) { } maxHistory := c.cfg.HealthChecker.TransitionHistory if w.backend.Pause(maxHistory) { - slog.Info("backend-pause", "backend", name) - c.emitForBackend(name, w.backend.Address, w.backend.Transitions[0], c.cfg.Frontends) + t := w.backend.Transitions[0] + slog.Info("backend-transition", "backend", name, + "from", t.From.String(), + "to", t.To.String(), + ) + c.emitForBackend(name, w.backend.Address, t, c.cfg.Frontends) } return BackendSnapshot{Health: w.backend, Config: w.entry}, true } @@ -252,8 +257,16 @@ func (c *Checker) ResumeBackend(name string) (BackendSnapshot, bool) { } maxHistory := c.cfg.HealthChecker.TransitionHistory if w.backend.Resume(maxHistory) { - slog.Info("backend-resume", "backend", name) - c.emitForBackend(name, w.backend.Address, w.backend.Transitions[0], c.cfg.Frontends) + t := w.backend.Transitions[0] + slog.Info("backend-transition", "backend", name, + "from", t.From.String(), + "to", t.To.String(), + ) + c.emitForBackend(name, w.backend.Address, t, c.cfg.Frontends) + select { + case w.wakeCh <- struct{}{}: + default: + } } return BackendSnapshot{Health: w.backend, Config: w.entry}, true } @@ -274,6 +287,7 @@ func (c *Checker) startWorker(ctx context.Context, name string, entry config.Bac hc: hc, entry: entry, cancel: cancel, + wakeCh: make(chan struct{}, 1), } w.backend.Start(maxHistory) c.workers[name] = w @@ -310,6 +324,7 @@ func (c *Checker) runProbe(ctx context.Context, name string, pos, total int) { entry := w.entry maxHistory := c.cfg.HealthChecker.TransitionHistory netns := c.cfg.HealthChecker.Netns + wakeCh := w.wakeCh var sleepFor time.Duration if entry.HealthCheck == "" { sleepFor = 30 * time.Second @@ -322,6 +337,7 @@ func (c *Checker) runProbe(ctx context.Context, name string, pos, total int) { case <-ctx.Done(): return case <-time.After(sleepFor): + case <-wakeCh: } var result health.ProbeResult diff --git a/internal/health/state.go b/internal/health/state.go index f00c30f..bd0ecc7 100644 --- a/internal/health/state.go +++ b/internal/health/state.go @@ -104,13 +104,15 @@ type Backend struct { Transitions []Transition // newest first, capped at maxHistory } -// New creates a Backend in StateUnknown. +// New creates a Backend in StateUnknown with the health counter pre-loaded to +// Rise-1, so the very first probe resolves the state: one pass → Up, any +// fail → Down (via the StateUnknown shortcut in Record). func New(name string, addr net.IP, rise, fall int) *Backend { return &Backend{ Name: name, Address: addr, State: StateUnknown, - Counter: HealthCounter{Rise: rise, Fall: fall}, + Counter: HealthCounter{Rise: rise, Fall: fall, Health: rise - 1}, } } @@ -155,7 +157,7 @@ func (b *Backend) Resume(maxHistory int) bool { return false } b.transition(StateUnknown, ProbeResult{}, maxHistory) - b.Counter.Health = 0 + b.Counter.Health = b.Counter.Rise - 1 return true } diff --git a/internal/health/state_test.go b/internal/health/state_test.go index 4ab2730..bd7dc18 100644 --- a/internal/health/state_test.go +++ b/internal/health/state_test.go @@ -23,12 +23,14 @@ func TestInitialState(t *testing.T) { if len(b.Transitions) != 0 { t.Errorf("initial transitions: got %d, want 0", len(b.Transitions)) } - if b.Counter.Health != 0 { - t.Errorf("initial counter health: got %d, want 0", b.Counter.Health) + // Counter pre-loaded to Rise-1 so first probe resolves state immediately. + if want := b.Counter.Rise - 1; b.Counter.Health != want { + t.Errorf("initial counter health: got %d, want %d (rise-1)", b.Counter.Health, want) } } -// TestRiseToUp: rise=2 passes from Down/Unknown → Up. +// TestRiseToUp: from Unknown/Down with counter pre-loaded to Rise-1, a single +// pass is enough to reach Up. func TestRiseToUp(t *testing.T) { tests := []struct { name string @@ -39,18 +41,11 @@ func TestRiseToUp(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - b := newBackend() + b := newBackend() // rise=2 → Health starts at Rise-1=1 b.State = tt.initialState - // First pass: counter=1, still in DOWN range (rise=2), no transition. - if b.Record(pass(), 5) { - t.Error("should not transition after 1 pass (rise=2)") - } - if b.State != tt.initialState { - t.Errorf("state changed early: got %s", b.State) - } - // Second pass: counter=2=rise, transitions to Up. + // Counter is already at Rise-1; one pass reaches Rise → Up. if !b.Record(pass(), 5) { - t.Error("should transition to Up after 2 passes") + t.Error("should transition to Up after 1 pass (counter pre-loaded to rise-1)") } if b.State != StateUp { t.Errorf("state: got %s, want up", b.State)