when maglevd rehashes its config:

- when a backend gets newly added or restarted, an event should fire; perhaps transition to 'unknown'
- when a backend gets removed, an event should fire; perhaps transition to 'removed'
- when a backend is in 'unknown' state, fast-interval is appropriate
This commit is contained in:
2026-04-11 02:16:08 +02:00
parent 530d85740e
commit 7ad183320c
3 changed files with 88 additions and 21 deletions

View File

@@ -33,6 +33,7 @@ const (
StateUp
StateDown
StatePaused
StateRemoved // backend was removed from configuration
)
func (s State) String() string {
@@ -45,6 +46,8 @@ func (s State) String() string {
return "down"
case StatePaused:
return "paused"
case StateRemoved:
return "removed"
default:
return "unknown"
}
@@ -118,7 +121,7 @@ func New(name string, addr net.IP, rise, fall int) *Backend {
// failure means the backend is not yet confirmed reachable), and to StateUp
// once the counter reaches Rise consecutive passes.
func (b *Backend) Record(r ProbeResult, maxHistory int) bool {
if b.State == StatePaused {
if b.State == StatePaused || b.State == StateRemoved {
return false
}
if r.OK {
@@ -157,12 +160,15 @@ func (b *Backend) Resume(maxHistory int) bool {
}
// NextInterval returns the appropriate probe interval based on state and counter:
// - Unknown (no probes yet): interval — probe promptly to establish initial state
// - Fully healthy (counter at max): interval
// - Fully down (counter at 0): downInterval (falls back to interval)
// - Degraded (anywhere in between): fastInterval (falls back to interval)
// - Unknown (initial / post-resume): fastInterval (falls back to interval) — probe quickly to establish state
// - Fully healthy (counter at max): interval
// - Fully down (counter at 0): downInterval (falls back to interval)
// - Degraded (anywhere in between): fastInterval (falls back to interval)
func (b *Backend) NextInterval(interval, fastInterval, downInterval time.Duration) time.Duration {
if b.State == StateUnknown {
if fastInterval > 0 {
return fastInterval
}
return interval
}
if b.Counter.Health == b.Counter.Max() {
@@ -180,6 +186,21 @@ func (b *Backend) NextInterval(interval, fastInterval, downInterval time.Duratio
return interval
}
// Start records the initial StateUnknown transition when a backend is first
// created or restarted. It exists solely to populate the transition history
// and fire a reload event; the state does not change.
func (b *Backend) Start(maxHistory int) Transition {
b.transition(StateUnknown, ProbeResult{Code: "start"}, maxHistory)
return b.Transitions[0]
}
// Remove transitions the backend to StateRemoved. Returns the transition.
// After this call no further probe results are accepted.
func (b *Backend) Remove(maxHistory int) Transition {
b.transition(StateRemoved, ProbeResult{Code: "removed"}, maxHistory)
return b.Transitions[0]
}
// transition appends a new Transition and updates State.
func (b *Backend) transition(to State, r ProbeResult, maxHistory int) {
t := Transition{From: b.State, To: to, At: time.Now(), Result: r}

View File

@@ -169,9 +169,13 @@ func TestNextInterval(t *testing.T) {
b := New("test", net.ParseIP("10.0.0.1"), 2, 3) // max=4
// Unknown (no probes yet): always use interval, never downInterval.
if got := b.NextInterval(interval, fast, down); got != interval {
t.Errorf("StateUnknown: got %v, want %v (interval)", got, interval)
// Unknown: use fast-interval to establish state quickly.
if got := b.NextInterval(interval, fast, down); got != fast {
t.Errorf("StateUnknown with fast: got %v, want %v (fast)", got, fast)
}
// Unknown, no fast-interval configured: fall back to interval.
if got := b.NextInterval(interval, 0, down); got != interval {
t.Errorf("StateUnknown without fast: got %v, want %v (interval)", got, interval)
}
// After first fail: counter=0, state=Down → downInterval.
@@ -291,6 +295,40 @@ func TestTransitionTimestamp(t *testing.T) {
}
}
func TestStartRemove(t *testing.T) {
b := newBackend()
// Start records an unknown→unknown transition.
tr := b.Start(5)
if tr.From != StateUnknown || tr.To != StateUnknown {
t.Errorf("Start transition: got %s→%s, want unknown→unknown", tr.From, tr.To)
}
if len(b.Transitions) != 1 {
t.Errorf("transitions after Start: got %d, want 1", len(b.Transitions))
}
if b.State != StateUnknown {
t.Errorf("state after Start: got %s, want unknown", b.State)
}
// Remove transitions to StateRemoved.
b.State = StateUp
tr = b.Remove(5)
if tr.From != StateUp || tr.To != StateRemoved {
t.Errorf("Remove transition: got %s→%s, want up→removed", tr.From, tr.To)
}
if b.State != StateRemoved {
t.Errorf("state after Remove: got %s, want removed", b.State)
}
// Record is a no-op once removed.
if b.Record(pass(), 5) {
t.Error("Record should not transition a removed backend")
}
if b.State != StateRemoved {
t.Errorf("state changed after Record on removed backend: %s", b.State)
}
}
func TestStateString(t *testing.T) {
cases := []struct {
s State
@@ -300,6 +338,7 @@ func TestStateString(t *testing.T) {
{StateUp, "up"},
{StateDown, "down"},
{StatePaused, "paused"},
{StateRemoved, "removed"},
}
for _, c := range cases {
if c.s.String() != c.want {