Checker / reload:
- Reload's update-in-place branch now mirrors b.Address onto the
runtime health.Backend. Without this, GetBackend kept returning
the pre-reload address indefinitely after a config edit that
touched addresses but not healthcheck settings — the VPP sync
path reads cfg.Backends directly so the dataplane moved on
while the gRPC and SPA view stayed wedged on the old IPv4/IPv6.
Sync (internal/vpp/lbsync.go):
- reconcileVIP now detects encap mismatch in addition to
src-ip-sticky mismatch and takes the full tear-down / re-add
path via a new shared recreateVIP helper. Triggered when every
backend flips address family (gre4 <-> gre6) and the existing
VIP can no longer accept new ASes — previously the sync wedged
with 'Invalid address family' until a full maglevd restart.
- setASWeight is issued whenever the state machine requests
flush (a.Flush=true), not only on the weight-value transition
edge. Fixes the case where a backend reached StateDisabled
after its effective weight had already been drained to 0 by
pool failover — the sticky-cache entries pointing at it were
previously never cleared.
maglev-frontend:
- signal.Ignore(SIGHUP) so a controlling-terminal disconnect
doesn't kill the daemon.
- debian/vpp-maglev.service grants CAP_SYS_ADMIN in addition to
CAP_NET_RAW so setns(CLONE_NEWNET) can join the healthcheck
netns. Comment documents the 'operation not permitted' symptom
and notes the knob can be dropped if the deployment doesn't use
the 'netns:' healthcheck option.
LB plugin counters (internal/vpp/lbstats.go + friends):
- Fix the VIP counter regex: the LB plugin registers
vlib_simple_counter_main_t names without a leading '/'
(vlib_validate_simple_counter in counter.c:50 uses cm->name
verbatim; only entries that set cm->stat_segment_name get a
slash). first/next/untracked/no-server now read through as
live values instead of zero.
- Drop the per-backend FIB counter block end-to-end (proto,
grpcapi, metrics, vpp.Client, lbstats, maglevc). Traced from
lb/node.c:558 into ip{4,6}_forward.h:141 — the LB plugin
forwards by writing adj_index[VLIB_TX] directly and bypassing
ip{4,6}_lookup_inline, which is the only path that increments
lbm_to_counters. The backend's FIB load_balance stats_index
literally never ticks for LB-forwarded traffic, so the column
was always zero and misleading. docs/implementation/TODO
records the full investigation and the recommended upstream
path (new lb_as_stats_dump API message) for when we're ready
to carry that VPP patch.
- maglevc show vpp lb counters: plain-text tabular headers.
label() wraps strings in ANSI escapes (~11 bytes of overhead),
but tabwriter counts bytes, not rendered width — so a header
row with label()'d cells and data rows with plain cells drifts
column alignment on every row. color.go comment now spells
out the constraint: label() only works when column N is
wrapped identically in every row (key-value layouts are fine,
multi-column tables with header-only labelling are not).
SPA:
- stores/scope.ts is cookie-backed (maglev_scope, 1 year,
SameSite=Lax). App.tsx hydrates from the cookie then validates
against the fetched snapshots: a cookie referencing a maglevd
that no longer exists falls through to snaps[0] instead of
leaving the user on a ghost selection.
- components/Flash.tsx wraps props.value in createMemo. Solid's
on() fires its callback on every dep notification, not on
value change — source is right in solid-js/dist/solid.js:460,
no equality check. Without the memo, flipping scope between
two 'connected' maglevds (or any other cross-store reactive
re-eval that doesn't actually change the concrete string)
replays the animation every time. createMemo's default ===
dedupe fixes it in one place for every Flash consumer,
superseding the local createMemo workaround we'd added in
BackendRow earlier.
309 lines
9.6 KiB
Protocol Buffer
309 lines
9.6 KiB
Protocol Buffer
syntax = "proto3";
|
|
|
|
package maglev;
|
|
|
|
option go_package = "git.ipng.ch/ipng/vpp-maglev/internal/grpcapi";
|
|
|
|
// Maglev exposes the state of backend health for all frontends.
|
|
service Maglev {
|
|
rpc ListFrontends(ListFrontendsRequest) returns (ListFrontendsResponse);
|
|
rpc GetFrontend(GetFrontendRequest) returns (FrontendInfo);
|
|
rpc ListBackends(ListBackendsRequest) returns (ListBackendsResponse);
|
|
rpc GetBackend(GetBackendRequest) returns (BackendInfo);
|
|
rpc PauseBackend(BackendRequest) returns (BackendInfo);
|
|
rpc ResumeBackend(BackendRequest) returns (BackendInfo);
|
|
rpc EnableBackend(BackendRequest) returns (BackendInfo);
|
|
rpc DisableBackend(BackendRequest) returns (BackendInfo);
|
|
rpc ListHealthChecks(ListHealthChecksRequest) returns (ListHealthChecksResponse);
|
|
rpc GetHealthCheck(GetHealthCheckRequest) returns (HealthCheckInfo);
|
|
rpc SetFrontendPoolBackendWeight(SetWeightRequest) returns (FrontendInfo);
|
|
rpc WatchEvents(WatchRequest) returns (stream Event);
|
|
rpc CheckConfig(CheckConfigRequest) returns (CheckConfigResponse);
|
|
rpc ReloadConfig(ReloadConfigRequest) returns (ReloadConfigResponse);
|
|
rpc GetVPPInfo(GetVPPInfoRequest) returns (VPPInfo);
|
|
rpc GetVPPLBState(GetVPPLBStateRequest) returns (VPPLBState);
|
|
rpc SyncVPPLBState(SyncVPPLBStateRequest) returns (SyncVPPLBStateResponse);
|
|
rpc GetVPPLBCounters(GetVPPLBCountersRequest) returns (VPPLBCounters);
|
|
}
|
|
|
|
// ---- requests ---------------------------------------------------------------
|
|
|
|
message ListFrontendsRequest {}
|
|
|
|
message GetFrontendRequest {
|
|
string name = 1;
|
|
}
|
|
|
|
message ListBackendsRequest {}
|
|
|
|
message GetBackendRequest {
|
|
string name = 1;
|
|
}
|
|
|
|
message BackendRequest {
|
|
string name = 1;
|
|
}
|
|
|
|
message ListHealthChecksRequest {}
|
|
|
|
message GetHealthCheckRequest {
|
|
string name = 1;
|
|
}
|
|
|
|
message CheckConfigRequest {}
|
|
|
|
message CheckConfigResponse {
|
|
bool ok = 1;
|
|
string parse_error = 2; // set when YAML cannot be read or parsed
|
|
string semantic_error = 3; // set when YAML is valid but semantically incorrect
|
|
}
|
|
|
|
message ReloadConfigRequest {}
|
|
|
|
message ReloadConfigResponse {
|
|
bool ok = 1;
|
|
string parse_error = 2; // set when YAML cannot be read or parsed
|
|
string semantic_error = 3; // set when YAML is valid but semantically incorrect
|
|
string reload_error = 4; // set when config is valid but the reload itself failed
|
|
}
|
|
|
|
message GetVPPInfoRequest {}
|
|
|
|
message VPPInfo {
|
|
string version = 1;
|
|
string build_date = 2;
|
|
string build_directory = 3;
|
|
uint32 pid = 4;
|
|
int64 boottime_ns = 5; // unix timestamp (ns) when VPP started (from /sys/boottime)
|
|
int64 connecttime_ns = 6; // unix timestamp (ns) when maglevd connected to VPP
|
|
}
|
|
|
|
// ---- VPP load-balancer state ------------------------------------------------
|
|
|
|
message GetVPPLBStateRequest {}
|
|
|
|
// VPPLBConf mirrors VPP's lb_conf_get_reply: global LB plugin settings.
|
|
message VPPLBConf {
|
|
string ip4_src_address = 1;
|
|
string ip6_src_address = 2;
|
|
uint32 sticky_buckets_per_core = 3;
|
|
uint32 flow_timeout = 4;
|
|
}
|
|
|
|
// VPPLBAS is one application server attached to a VIP.
|
|
message VPPLBAS {
|
|
string address = 1;
|
|
uint32 weight = 2; // 0-100
|
|
uint32 flags = 3; // VPP AS flags (bit 0 = used, bit 1 = flushed)
|
|
uint32 num_buckets = 4;
|
|
int64 in_use_since_ns = 5; // unix timestamp (ns), 0 if never used
|
|
}
|
|
|
|
// VPPLBVIP mirrors VPP's lb_vip_details plus the attached application servers.
|
|
// Note: srv_type, dscp, and target_port are intentionally omitted — maglevd
|
|
// only supports GRE encap, so NAT/L3DSR-specific fields don't apply.
|
|
message VPPLBVIP {
|
|
string prefix = 1; // CIDR, e.g. 192.0.2.1/32
|
|
uint32 protocol = 2; // 6=TCP, 17=UDP, 255=any
|
|
uint32 port = 3; // 0 = all-port VIP
|
|
string encap = 4; // gre4|gre6|l3dsr|nat4|nat6
|
|
uint32 flow_table_length = 5;
|
|
repeated VPPLBAS application_servers = 6;
|
|
bool src_ip_sticky = 7; // source-IP based sticky session (scraped via cli_inband)
|
|
}
|
|
|
|
message VPPLBState {
|
|
VPPLBConf conf = 1;
|
|
repeated VPPLBVIP vips = 2;
|
|
}
|
|
|
|
// SyncVPPLBStateRequest triggers a reconciliation between the maglev config
|
|
// and the VPP load-balancer dataplane. When frontend_name is set, only that
|
|
// frontend's VIP is synced (SyncLBStateVIP) and no VIPs are removed. When
|
|
// unset, a full reconciliation runs (SyncLBStateAll), which will also remove
|
|
// stale VIPs from VPP.
|
|
message SyncVPPLBStateRequest {
|
|
optional string frontend_name = 1;
|
|
}
|
|
|
|
message SyncVPPLBStateResponse {}
|
|
|
|
// ---- VPP load-balancer runtime counters ------------------------------------
|
|
|
|
// GetVPPLBCountersRequest asks maglevd for the most recent per-VIP and
|
|
// per-backend counter snapshot. The data is served from an in-process
|
|
// cache that is refreshed every ~5 seconds server-side; the call itself
|
|
// is cheap and does not hit VPP.
|
|
message GetVPPLBCountersRequest {}
|
|
|
|
// VPPLBVIPCounters is the point-in-time counter row for a single VIP.
|
|
// The four lb_* fields are the LB plugin's SimpleCounters (packets only);
|
|
// packets / bytes come from the VPP FIB's combined counter at the VIP's
|
|
// host prefix (/net/route/to).
|
|
message VPPLBVIPCounters {
|
|
string prefix = 1; // CIDR, e.g. 192.0.2.1/32
|
|
string protocol = 2; // tcp | udp | any
|
|
uint32 port = 3;
|
|
uint64 next_packet = 4; // "/packet from existing sessions"
|
|
uint64 first_packet = 5; // "/first session packet"
|
|
uint64 untracked_packet = 6; // "/untracked packet"
|
|
uint64 no_server = 7; // "/no server configured"
|
|
uint64 packets = 8; // /net/route/to (FIB, summed across workers)
|
|
uint64 bytes = 9; // /net/route/to (FIB, summed across workers)
|
|
}
|
|
|
|
// VPPLBCounters wraps the per-VIP counter list returned by
|
|
// GetVPPLBCounters. There is no per-backend counter block: VPP's LB
|
|
// plugin forwarding node bypasses ip{4,6}_lookup_inline and writes
|
|
// adj_index[VLIB_TX] directly, so /net/route/to at a backend's FIB
|
|
// entry never ticks for LB-forwarded traffic — the four per-VIP
|
|
// counters are the only per-VIP-and-coarser signal VPP exposes today.
|
|
message VPPLBCounters {
|
|
repeated VPPLBVIPCounters vips = 1;
|
|
// Field 2 (repeated VPPLBBackendCounters backends) was removed; the
|
|
// index is reserved so a future replacement doesn't accidentally
|
|
// reuse it.
|
|
reserved 2;
|
|
}
|
|
|
|
message SetWeightRequest {
|
|
string frontend = 1;
|
|
string pool = 2;
|
|
string backend = 3;
|
|
int32 weight = 4; // 0-100
|
|
// flush, when true, also clears VPP's flow table for this backend
|
|
// so existing sessions are torn down. When false (default), only
|
|
// Maglev's new-bucket mapping is updated and live flows keep
|
|
// draining to this backend.
|
|
bool flush = 5;
|
|
}
|
|
|
|
// WatchRequest controls which event types are streamed. All fields default to
|
|
// true (i.e. an empty request subscribes to everything at info level).
|
|
message WatchRequest {
|
|
optional bool log = 1; // include log events (default: true)
|
|
string log_level = 2; // minimum log level: debug|info|warn|error (default: info)
|
|
optional bool backend = 3; // include backend transition events (default: true)
|
|
optional bool frontend = 4; // include frontend events (default: true)
|
|
}
|
|
|
|
// ---- responses --------------------------------------------------------------
|
|
|
|
message ListFrontendsResponse {
|
|
repeated string frontend_names = 1;
|
|
}
|
|
|
|
message PoolBackendInfo {
|
|
string name = 1;
|
|
int32 weight = 2; // configured weight from YAML (0-100)
|
|
int32 effective_weight = 3; // state-aware weight after pool-failover logic
|
|
}
|
|
|
|
message PoolInfo {
|
|
string name = 1;
|
|
repeated PoolBackendInfo backends = 2;
|
|
}
|
|
|
|
message FrontendInfo {
|
|
string name = 1;
|
|
string address = 2;
|
|
string protocol = 3;
|
|
uint32 port = 4;
|
|
repeated PoolInfo pools = 5;
|
|
string description = 6;
|
|
bool src_ip_sticky = 7; // VPP LB uses src-IP-based stickiness for this VIP
|
|
}
|
|
|
|
message ListBackendsResponse {
|
|
repeated string backend_names = 1;
|
|
}
|
|
|
|
message ListHealthChecksResponse {
|
|
repeated string names = 1;
|
|
}
|
|
|
|
message HTTPCheckParams {
|
|
string path = 1;
|
|
string host = 2;
|
|
int32 response_code_min = 3;
|
|
int32 response_code_max = 4;
|
|
string response_regexp = 5;
|
|
string server_name = 6;
|
|
bool insecure_skip_verify = 7;
|
|
}
|
|
|
|
message TCPCheckParams {
|
|
bool ssl = 1;
|
|
string server_name = 2;
|
|
bool insecure_skip_verify = 3;
|
|
}
|
|
|
|
message HealthCheckInfo {
|
|
string name = 1;
|
|
string type = 2;
|
|
uint32 port = 3;
|
|
string probe_ipv4_src = 4;
|
|
string probe_ipv6_src = 5;
|
|
int64 interval_ns = 6;
|
|
int64 fast_interval_ns = 7;
|
|
int64 down_interval_ns = 8;
|
|
int64 timeout_ns = 9;
|
|
int32 rise = 10;
|
|
int32 fall = 11;
|
|
HTTPCheckParams http = 12;
|
|
TCPCheckParams tcp = 13;
|
|
}
|
|
|
|
message BackendInfo {
|
|
string name = 1;
|
|
string address = 2;
|
|
string state = 3;
|
|
repeated TransitionRecord transitions = 4;
|
|
bool enabled = 5;
|
|
string healthcheck = 6;
|
|
}
|
|
|
|
message TransitionRecord {
|
|
string from = 1;
|
|
string to = 2;
|
|
int64 at_unix_ns = 3;
|
|
}
|
|
|
|
// ---- event stream -----------------------------------------------------------
|
|
|
|
// LogAttr is a single key/value attribute from a structured log record.
|
|
message LogAttr {
|
|
string key = 1;
|
|
string value = 2;
|
|
}
|
|
|
|
// LogEvent carries a single structured log record.
|
|
message LogEvent {
|
|
int64 at_unix_ns = 1;
|
|
string level = 2;
|
|
string msg = 3;
|
|
repeated LogAttr attrs = 4;
|
|
}
|
|
|
|
// BackendEvent is emitted on every backend state transition.
|
|
message BackendEvent {
|
|
string backend_name = 1;
|
|
TransitionRecord transition = 2;
|
|
}
|
|
|
|
// FrontendEvent is emitted when a frontend's aggregate state changes.
|
|
// Frontends have three states: unknown, up, down. See docs/healthchecks.md.
|
|
message FrontendEvent {
|
|
string frontend_name = 1;
|
|
TransitionRecord transition = 2;
|
|
}
|
|
|
|
// Event is the envelope returned by WatchEvents.
|
|
message Event {
|
|
oneof event {
|
|
LogEvent log = 1;
|
|
BackendEvent backend = 2;
|
|
FrontendEvent frontend = 3;
|
|
}
|
|
}
|