Add WatchEvents, enable/disable/weight RPCs, and config check

gRPC / proto - Rename WatchBackendEvents → WatchEvents; return a stream of Event oneof (LogEvent, BackendEvent, FrontendEvent) with optional filter flags (log, log_level, backend, frontend) - Add EnableBackend, DisableBackend, SetFrontendPoolBackendWeight RPCs - Rename PauseResumeRequest → BackendRequest - Add CheckConfig RPC returning ok/parse_error/semantic_error maglevd - Route slog through a LogBroadcaster (slog.Handler) so WatchEvents subscribers can receive structured log records independently of the daemon's own --log-level - Add --reflection flag (default true) to toggle gRPC server reflection - Add --check flag: validates config file and exits 0/1/2 - SIGHUP: use config.Check before applying reload; log parse vs semantic error separately; refuse reload on any error - Rename default config path /etc/maglev → /etc/vpp-maglev maglevc - Add 'watch events [num <n>] [log [level <level>]] [backend] [frontend]' command; prints compact protojson, stops on any keypress or Ctrl-C; uses cbreak mode (not raw) so output post-processing is preserved - Add 'set backend <name> enable|disable' - Add 'set frontend <name> pool <pool> backend <name> weight <0-100>' - Add 'config check' command Debian packaging - Rename service unit to vpp-maglevd.service - Rename conffiles to /etc/default/vpp-maglev and /etc/vpp-maglev/ - Create maglevd system user/group in postinst; add to vpp group if present - Add postrm; add adduser to Depends
2026-04-11 16:42:11 +02:00
parent d612086a5f
commit 58391f5463
26 changed files with 1969 additions and 400 deletions
--- a/internal/grpcapi/server.go
+++ b/internal/grpcapi/server.go
@@ -4,6 +4,7 @@ package grpcapi

 import (
 	"context"
+	"log/slog"
 	"net"

 	"google.golang.org/grpc/codes"
@@ -17,15 +18,20 @@ import (
 // Server implements the MaglevServer gRPC interface.
 type Server struct {
 	UnimplementedMaglevServer
-	ctx     context.Context
-	checker *checker.Checker
+	ctx        context.Context
+	checker    *checker.Checker
+	logs       *LogBroadcaster
+	configPath string
 }

-// NewServer creates a Server backed by the given Checker. The provided context
-// controls the lifetime of streaming RPCs: cancelling it closes all active
-// WatchBackendEvents streams so that grpc.Server.GracefulStop can complete.
-func NewServer(ctx context.Context, c *checker.Checker) *Server {
-	return &Server{ctx: ctx, checker: c}
+// NewServer creates a Server backed by the given Checker. logs may be nil, in
+// which case log events are never sent to WatchEvents streams. configPath is
+// used by CheckConfig to reload and validate the configuration file on demand.
+// The provided context controls the lifetime of streaming RPCs: cancelling it
+// closes all active WatchEvents streams so that grpc.Server.GracefulStop can
+// complete.
+func NewServer(ctx context.Context, c *checker.Checker, logs *LogBroadcaster, configPath string) *Server {
+	return &Server{ctx: ctx, checker: c, logs: logs, configPath: configPath}
 }

 // ListFrontends returns the names of all configured frontends.
@@ -57,7 +63,7 @@ func (s *Server) GetBackend(_ context.Context, req *GetBackendRequest) (*Backend
 }

 // PauseBackend pauses health checking for a backend by name.
-func (s *Server) PauseBackend(_ context.Context, req *PauseResumeRequest) (*BackendInfo, error) {
+func (s *Server) PauseBackend(_ context.Context, req *BackendRequest) (*BackendInfo, error) {
 	b, ok := s.checker.PauseBackend(req.Name)
 	if !ok {
 		return nil, status.Errorf(codes.NotFound, "backend %q not found", req.Name)
@@ -66,7 +72,7 @@ func (s *Server) PauseBackend(_ context.Context, req *PauseResumeRequest) (*Back
 }

 // ResumeBackend resumes health checking for a backend by name.
-func (s *Server) ResumeBackend(_ context.Context, req *PauseResumeRequest) (*BackendInfo, error) {
+func (s *Server) ResumeBackend(_ context.Context, req *BackendRequest) (*BackendInfo, error) {
 	b, ok := s.checker.ResumeBackend(req.Name)
 	if !ok {
 		return nil, status.Errorf(codes.NotFound, "backend %q not found", req.Name)
@@ -74,6 +80,36 @@ func (s *Server) ResumeBackend(_ context.Context, req *PauseResumeRequest) (*Bac
 	return backendToProto(b), nil
 }

+// EnableBackend re-enables a previously disabled backend.
+func (s *Server) EnableBackend(_ context.Context, req *BackendRequest) (*BackendInfo, error) {
+	b, ok := s.checker.EnableBackend(req.Name)
+	if !ok {
+		return nil, status.Errorf(codes.NotFound, "backend %q not found", req.Name)
+	}
+	return backendToProto(b), nil
+}
+
+// DisableBackend disables a backend, stopping its probe goroutine.
+func (s *Server) DisableBackend(_ context.Context, req *BackendRequest) (*BackendInfo, error) {
+	b, ok := s.checker.DisableBackend(req.Name)
+	if !ok {
+		return nil, status.Errorf(codes.NotFound, "backend %q not found", req.Name)
+	}
+	return backendToProto(b), nil
+}
+
+// SetFrontendPoolBackendWeight updates the weight of a backend in a pool.
+func (s *Server) SetFrontendPoolBackendWeight(_ context.Context, req *SetWeightRequest) (*FrontendInfo, error) {
+	if req.Weight < 0 || req.Weight > 100 {
+		return nil, status.Errorf(codes.InvalidArgument, "weight %d out of range [0, 100]", req.Weight)
+	}
+	fe, err := s.checker.SetFrontendPoolBackendWeight(req.Frontend, req.Pool, req.Backend, int(req.Weight))
+	if err != nil {
+		return nil, status.Errorf(codes.NotFound, "%v", err)
+	}
+	return frontendToProto(req.Frontend, fe), nil
+}
+
 // ListHealthChecks returns the names of all configured health checks.
 func (s *Server) ListHealthChecks(_ context.Context, _ *ListHealthChecksRequest) (*ListHealthChecksResponse, error) {
 	return &ListHealthChecksResponse{Names: s.checker.ListHealthChecks()}, nil
@@ -88,30 +124,55 @@ func (s *Server) GetHealthCheck(_ context.Context, req *GetHealthCheckRequest) (
 	return healthCheckToProto(req.Name, hc), nil
 }

-// WatchBackendEvents streams the current state of all backends on connect, then
-// streams live state transitions until the client disconnects.
-func (s *Server) WatchBackendEvents(_ *WatchRequest, stream Maglev_WatchBackendEventsServer) error {
-	// Send current state of all backends as synthetic events.
-	for _, name := range s.checker.ListBackends() {
-		snap, ok := s.checker.GetBackend(name)
-		if !ok {
-			continue
-		}
-		ev := &BackendEvent{
-			BackendName: name,
-			Transition: &TransitionRecord{
-				From:     snap.Health.State.String(),
-				To:       snap.Health.State.String(),
-				AtUnixNs: 0,
-			},
-		}
-		if err := stream.Send(ev); err != nil {
-			return err
+// WatchEvents streams events to the client. On connect, the current state of
+// all backends is sent as synthetic BackendEvents. Afterwards, live events are
+// forwarded based on the filter flags in req. An unset (nil) flag defaults to
+// true (subscribe). An empty log_level defaults to "info".
+func (s *Server) WatchEvents(req *WatchRequest, stream Maglev_WatchEventsServer) error {
+	wantLog := req.Log == nil || *req.Log
+	wantBackend := req.Backend == nil || *req.Backend
+	wantFrontend := req.Frontend == nil || *req.Frontend
+	_ = wantFrontend // no frontend events emitted yet
+
+	logLevel := slog.LevelInfo
+	if req.LogLevel != "" {
+		if err := logLevel.UnmarshalText([]byte(req.LogLevel)); err != nil {
+			return status.Errorf(codes.InvalidArgument, "invalid log_level %q: must be debug, info, warn, or error", req.LogLevel)
 		}
 	}

-	ch, unsub := s.checker.Subscribe()
-	defer unsub()
+	// Subscribe to log events (nil channel blocks forever when not wanted).
+	var logCh <-chan *LogEvent
+	if wantLog && s.logs != nil {
+		var unsub func()
+		logCh, unsub = s.logs.Subscribe(logLevel)
+		defer unsub()
+	}
+
+	// Subscribe to backend events; send initial state snapshot first.
+	var backendCh <-chan checker.Event
+	if wantBackend {
+		for _, name := range s.checker.ListBackends() {
+			snap, ok := s.checker.GetBackend(name)
+			if !ok {
+				continue
+			}
+			ev := &Event{Event: &Event_Backend{Backend: &BackendEvent{
+				BackendName: name,
+				Transition: &TransitionRecord{
+					From:     snap.Health.State.String(),
+					To:       snap.Health.State.String(),
+					AtUnixNs: 0,
+				},
+			}}}
+			if err := stream.Send(ev); err != nil {
+				return err
+			}
+		}
+		var unsub func()
+		backendCh, unsub = s.checker.Subscribe()
+		defer unsub()
+	}

 	for {
 		select {
@@ -119,21 +180,38 @@ func (s *Server) WatchBackendEvents(_ *WatchRequest, stream Maglev_WatchBackendE
 			return status.Error(codes.Unavailable, "server shutting down")
 		case <-stream.Context().Done():
 			return nil
-		case e, ok := <-ch:
+		case le, ok := <-logCh:
 			if !ok {
 				return nil
 			}
-			ev := &BackendEvent{
+			if err := stream.Send(&Event{Event: &Event_Log{Log: le}}); err != nil {
+				return err
+			}
+		case e, ok := <-backendCh:
+			if !ok {
+				return nil
+			}
+			if err := stream.Send(&Event{Event: &Event_Backend{Backend: &BackendEvent{
 				BackendName: e.BackendName,
 				Transition:  transitionToProto(e.Transition),
-			}
-			if err := stream.Send(ev); err != nil {
+			}}}); err != nil {
 				return err
 			}
 		}
 	}
 }

+// CheckConfig reads and validates the configuration file, returning a
+// structured result that distinguishes YAML parse errors from semantic errors.
+func (s *Server) CheckConfig(_ context.Context, _ *CheckConfigRequest) (*CheckConfigResponse, error) {
+	_, result := config.Check(s.configPath)
+	return &CheckConfigResponse{
+		Ok:            result.OK(),
+		ParseError:    result.ParseError,
+		SemanticError: result.SemanticError,
+	}, nil
+}
+
 // ---- conversion helpers ----------------------------------------------------

 func frontendToProto(name string, fe config.Frontend) *FrontendInfo {