diff --git a/Makefile b/Makefile index 746b6c8..36d36f6 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,26 @@ TEST ?= tests/ VPP_API_DIR ?= $(HOME)/src/vpp/build-root/install-vpp_debug-native/vpp/share/vpp/api -.PHONY: all build build-amd64 build-arm64 test proto vpp-binapi lint fixstyle fixstyle-web pkg-deb robot-test clean maglevd-frontend-web +# GO_VERSION is what install-deps-go downloads from go.dev when the +# system Go is missing or older than this. Debian Trixie only ships +# golang-go 1.24 (main), and go.mod requires 1.25+, so the `apt install +# golang-go` path isn't sufficient — we fall back to the upstream +# tarball in /usr/local/go. Override on the command line to pull a +# specific patch release: make install-deps GO_VERSION=1.25.5 +GO_VERSION ?= 1.25.0 + +# GOLANGCI_LINT_VERSION is the minimum golangci-lint version that +# install-deps-go-tools accepts. Raised to 1.64.0 because earlier +# releases don't understand Go 1.25 syntax (1.64 is the last v1 line +# and shipped Go 1.25 support; any v2.x release satisfies the floor +# trivially via version sort). install-deps-go-tools always `go +# install`s @latest, then asserts the resulting binary reports a +# version >= this floor as a sanity check. Override on the command +# line if you want to force a specific minimum, e.g. +# make install-deps GOLANGCI_LINT_VERSION=2.0.0 +GOLANGCI_LINT_VERSION ?= 1.64.0 + +.PHONY: all build build-amd64 build-arm64 test proto vpp-binapi lint fixstyle fixstyle-web pkg-deb robot-test clean maglevd-frontend-web install-deps install-deps-apt install-deps-go install-deps-go-tools all: build @@ -110,6 +129,116 @@ fixstyle-web: lint: golangci-lint run ./... +# install-deps is an opt-in "set up a fresh developer box" target. Tested +# on Debian Trixie; the apt half should also work on Bookworm and recent +# Ubuntu LTS. Splits into three sub-targets so they can be run individually: +# +# install-deps-apt — Debian-packaged build-time deps (nodejs, npm, +# protoc, git, make, dpkg-dev, curl). +# install-deps-go — ensure a Go toolchain >= $(GO_VERSION) is on +# the system. Downloads the upstream tarball +# into /usr/local/go when the system Go is +# missing or older than the go.mod floor. +# install-deps-go-tools — `go install` the helpers this repo needs +# (protoc-gen-go, protoc-gen-go-grpc, golangci- +# lint) and assert golangci-lint is new enough +# to understand Go 1.25 syntax. +# +# Each sub-target is idempotent and safe to re-run. +install-deps: install-deps-apt install-deps-go install-deps-go-tools + @echo "" + @echo "==> All build dependencies installed." + @echo " Make sure these are on PATH:" + @echo " /usr/local/go/bin (Go toolchain)" + @echo " \$$(go env GOPATH)/bin (protoc-gen-go, golangci-lint, ...)" + +install-deps-apt: + @set -eu; \ + if [ "$$(id -u)" = 0 ]; then SUDO=""; else SUDO="sudo"; fi; \ + echo "==> Installing apt packages (nodejs, npm, protoc, git, make, dpkg-dev)"; \ + $$SUDO apt-get update; \ + $$SUDO apt-get install -y --no-install-recommends \ + nodejs npm protobuf-compiler git make dpkg-dev \ + ca-certificates curl tar + +# install-deps-go short-circuits when go env GOVERSION already reports a +# version >= GO_VERSION. Otherwise it downloads the official upstream +# tarball (https://go.dev/dl/) and extracts it to /usr/local/go, matching +# the layout that go.dev recommends and that most Debian setups use for +# "Go newer than apt provides". +install-deps-go: + @set -eu; \ + if [ "$$(id -u)" = 0 ]; then SUDO=""; else SUDO="sudo"; fi; \ + echo "==> Checking Go toolchain (required: $(GO_VERSION)+)"; \ + if command -v go >/dev/null 2>&1; then \ + CURRENT=$$(go env GOVERSION 2>/dev/null | sed 's/^go//'); \ + OLDEST=$$(printf '%s\n%s\n' "$(GO_VERSION)" "$$CURRENT" | sort -V | head -n1); \ + if [ "$$OLDEST" = "$(GO_VERSION)" ] && [ -n "$$CURRENT" ]; then \ + echo " go$$CURRENT already installed (>= $(GO_VERSION)), skipping."; \ + exit 0; \ + fi; \ + echo " go$$CURRENT is older than $(GO_VERSION), upgrading."; \ + else \ + echo " no Go toolchain on PATH, installing."; \ + fi; \ + DEB_ARCH=$$(dpkg --print-architecture); \ + case "$$DEB_ARCH" in \ + amd64) GOARCH=amd64 ;; \ + arm64) GOARCH=arm64 ;; \ + armhf) GOARCH=armv6l ;; \ + *) echo " unsupported architecture: $$DEB_ARCH" >&2; exit 1 ;; \ + esac; \ + TARBALL="go$(GO_VERSION).linux-$$GOARCH.tar.gz"; \ + URL="https://go.dev/dl/$$TARBALL"; \ + echo " downloading $$URL"; \ + curl -fsSL -o "/tmp/$$TARBALL" "$$URL"; \ + echo " installing to /usr/local/go"; \ + $$SUDO rm -rf /usr/local/go; \ + $$SUDO tar -C /usr/local -xzf "/tmp/$$TARBALL"; \ + rm -f "/tmp/$$TARBALL"; \ + echo " installed $$(/usr/local/go/bin/go version)" + +# install-deps-go-tools installs the three Go binaries this repo calls +# out to during `make proto` and `make lint`. protoc-gen-go and +# protoc-gen-go-grpc pin to specific upstream release branches; golangci- +# lint pulls @latest (the v2 install path) and then we assert the +# installed version parses as >= GOLANGCI_LINT_VERSION so a stale binary +# in $GOPATH/bin from a previous dev session doesn't silently get used +# against Go 1.25 code it can't parse. Run `make install-deps +# GOLANGCI_LINT_VERSION=2.0.0` if you want to enforce a tighter floor. +install-deps-go-tools: + @set -eu; \ + if ! command -v go >/dev/null 2>&1; then \ + export PATH="/usr/local/go/bin:$$PATH"; \ + fi; \ + echo "==> Installing Go tools via 'go install'"; \ + echo " google.golang.org/protobuf/cmd/protoc-gen-go"; \ + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest; \ + echo " google.golang.org/grpc/cmd/protoc-gen-go-grpc"; \ + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest; \ + echo " github.com/golangci/golangci-lint/v2/cmd/golangci-lint"; \ + go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest; \ + GOBIN="$$(go env GOBIN)"; \ + if [ -z "$$GOBIN" ]; then GOBIN="$$(go env GOPATH)/bin"; fi; \ + echo "==> Asserting golangci-lint version >= $(GOLANGCI_LINT_VERSION)"; \ + if ! "$$GOBIN/golangci-lint" version >/dev/null 2>&1; then \ + echo " ERROR: $$GOBIN/golangci-lint is not executable" >&2; \ + exit 1; \ + fi; \ + INSTALLED=$$("$$GOBIN/golangci-lint" version 2>&1 | sed -En 's/.*has version v?([0-9][0-9.]*).*/\1/p' | head -n1); \ + if [ -z "$$INSTALLED" ]; then \ + echo " ERROR: could not parse golangci-lint version output" >&2; \ + "$$GOBIN/golangci-lint" version >&2; \ + exit 1; \ + fi; \ + OLDEST=$$(printf '%s\n%s\n' "$(GOLANGCI_LINT_VERSION)" "$$INSTALLED" | sort -V | head -n1); \ + if [ "$$OLDEST" != "$(GOLANGCI_LINT_VERSION)" ]; then \ + echo " ERROR: golangci-lint $$INSTALLED is older than the required $(GOLANGCI_LINT_VERSION)" >&2; \ + echo " The tool understands Go 1.25 syntax only from v1.64.0 / v2.x onward." >&2; \ + exit 1; \ + fi; \ + echo " golangci-lint $$INSTALLED (>= $(GOLANGCI_LINT_VERSION)) OK" + tests/.venv: tests/requirements.txt python3 -m venv tests/.venv tests/.venv/bin/pip install -q -r tests/requirements.txt diff --git a/README.md b/README.md index ba4a745..1d9022e 100644 --- a/README.md +++ b/README.md @@ -10,17 +10,18 @@ Debian package: over a gRPC API + Prometheus `/metrics` endpoint. - **`maglevc`** — the interactive CLI client. Tab-completing shell with inline help; also runs one-shot commands for scripting. -- **`maglevd-frontend`** — optional web dashboard. One binary with the - SolidJS SPA embedded via `//go:embed`; connects to one or more - maglevds over gRPC and serves a live HTTP view (read-only `/view/` - and optional basic-auth `/admin/`). +- **`maglevd-frontend`** — optional web dashboard. One binary with a + SolidJS Single-Page-App; connects to one or more maglevds over gRPC and + serves a live HTTP view (read-only `/view/` and optional basic-auth + `/admin/` with mutating commands). ## Build and install ```sh -make # builds build//{maglevd,maglevc,maglevd-frontend} -make test # runs all tests -make pkg-deb # creates a Debian package for amd64 and arm64 +make install-deps # installs all build-time dependencies +make # builds build// binaries +make test # runs all tests +make pkg-deb # creates a Debian package for amd64 and arm64 ``` Requires Go 1.25+ and (for `make proto`) `protoc` with `protoc-gen-go` @@ -66,7 +67,24 @@ maglevd-frontend -server localhost:9090 -listen :8080 ``` Send `SIGHUP` to `maglevd` to reload config without restarting. -`maglevd` requires `CAP_NET_RAW` for ICMP health checks. +`maglevd` requires: + +- `CAP_NET_RAW` for ICMP health checks (raw sockets). +- `CAP_SYS_ADMIN` when `healthchecker.netns` is set so probes can + `setns(CLONE_NEWNET)` into the dataplane namespace. Without it, + every probe errors out with `enter netns "": operation not + permitted`. + +The Debian systemd unit grants both via `AmbientCapabilities` / +`CapabilityBoundingSet`, so `systemctl start vpp-maglev` works out +of the box. When running by hand under a non-root user, grant them +via `setcap cap_net_raw,cap_sys_admin=eip /usr/sbin/maglevd` or +equivalent. + +`maglevd-frontend` also ignores `SIGHUP` so a controlling-terminal +disconnect (e.g. closing the SSH session it was started from) +doesn't kill the daemon; `SIGTERM` / `SIGINT` remain the clean +shutdown signals. Every flag on every binary also has an environment-variable equivalent (e.g. `MAGLEV_CONFIG`, `MAGLEV_GRPC_ADDR`, @@ -89,5 +107,11 @@ deployments. ```sh docker build -t maglevd . -docker run --cap-add NET_RAW -v /etc/vpp-maglev:/etc/vpp-maglev maglevd +docker run --cap-add NET_RAW \ + -v /etc/vpp-maglev:/etc/vpp-maglev maglevd + +# With netns-scoped health checks (maglev.yaml sets healthchecker.netns): +docker run --cap-add NET_RAW --cap-add SYS_ADMIN \ + -v /etc/vpp-maglev:/etc/vpp-maglev \ + -v /var/run/netns:/var/run/netns maglevd ``` diff --git a/cmd/frontend/handlers.go b/cmd/frontend/handlers.go index 1cb8805..5f176d7 100644 --- a/cmd/frontend/handlers.go +++ b/cmd/frontend/handlers.go @@ -302,7 +302,7 @@ func serveSSE(w http.ResponseWriter, r *http.Request, broker *Broker) { w.WriteHeader(http.StatusOK) // Reconnect hint: EventSource default is 3–5s; 2s feels livelier. - fmt.Fprintf(w, "retry: 2000\n\n") + _, _ = fmt.Fprintf(w, "retry: 2000\n\n") flusher.Flush() result := broker.Subscribe(r.Header.Get("Last-Event-ID")) @@ -311,7 +311,7 @@ func serveSSE(w http.ResponseWriter, r *http.Request, broker *Broker) { if result.NeedResync { // No id: line — the browser keeps whatever Last-Event-ID it had, // so subsequent reconnects compare against a real event ID. - fmt.Fprintf(w, "event: resync\ndata: {}\n\n") + _, _ = fmt.Fprintf(w, "event: resync\ndata: {}\n\n") flusher.Flush() } for _, ev := range result.ReplayEvents { diff --git a/cmd/maglevc/commands.go b/cmd/maglevc/commands.go index 36441a6..2c890c5 100644 --- a/cmd/maglevc/commands.go +++ b/cmd/maglevc/commands.go @@ -162,8 +162,7 @@ func buildTree() *Node { // All tokens after 'events' are captured as args via a self-referencing slot // node. This lets runWatchEvents parse the optional flags manually while still // providing tab-completion through the dynamic enumerator. - var watchEventsOptSlot *Node - watchEventsOptSlot = &Node{ + watchEventsOptSlot := &Node{ Word: "", Help: "Stream events with options", Dynamic: dynWatchEventOpts, @@ -268,18 +267,18 @@ func runShowVPPInfo(ctx context.Context, client grpcapi.MaglevClient, _ []string return err } w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) - fmt.Fprintf(w, "%s\t%s\n", label("version"), info.Version) - fmt.Fprintf(w, "%s\t%s\n", label("build-date"), info.BuildDate) - fmt.Fprintf(w, "%s\t%s\n", label("build-dir"), info.BuildDirectory) - fmt.Fprintf(w, "%s\t%d\n", label("vpp-pid"), info.Pid) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("version"), info.Version) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("build-date"), info.BuildDate) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("build-dir"), info.BuildDirectory) + _, _ = fmt.Fprintf(w, "%s\t%d\n", label("vpp-pid"), info.Pid) if info.BoottimeNs > 0 { bootTime := time.Unix(0, info.BoottimeNs) - fmt.Fprintf(w, "%s\t%s (%s)\n", label("vpp-boottime"), + _, _ = fmt.Fprintf(w, "%s\t%s (%s)\n", label("vpp-boottime"), bootTime.Format("2006-01-02 15:04:05"), formatDuration(time.Since(bootTime))) } connTime := time.Unix(0, info.ConnecttimeNs) - fmt.Fprintf(w, "%s\t%s (%s)\n", label("connected"), + _, _ = fmt.Fprintf(w, "%s\t%s (%s)\n", label("connected"), connTime.Format("2006-01-02 15:04:05"), formatDuration(time.Since(connTime))) return w.Flush() @@ -295,15 +294,15 @@ func runShowVPPLBState(ctx context.Context, client grpcapi.MaglevClient, _ []str // ---- global config ---- w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) - fmt.Fprintf(w, "%s\n", label("global")) + _, _ = fmt.Fprintf(w, "%s\n", label("global")) if state.Conf.Ip4SrcAddress != "" { - fmt.Fprintf(w, " %s\t%s\n", label("ip4-src"), state.Conf.Ip4SrcAddress) + _, _ = fmt.Fprintf(w, " %s\t%s\n", label("ip4-src"), state.Conf.Ip4SrcAddress) } if state.Conf.Ip6SrcAddress != "" { - fmt.Fprintf(w, " %s\t%s\n", label("ip6-src"), state.Conf.Ip6SrcAddress) + _, _ = fmt.Fprintf(w, " %s\t%s\n", label("ip6-src"), state.Conf.Ip6SrcAddress) } - fmt.Fprintf(w, " %s\t%d\n", label("sticky-buckets-per-core"), state.Conf.StickyBucketsPerCore) - fmt.Fprintf(w, " %s\t%ds\n", label("flow-timeout"), state.Conf.FlowTimeout) + _, _ = fmt.Fprintf(w, " %s\t%d\n", label("sticky-buckets-per-core"), state.Conf.StickyBucketsPerCore) + _, _ = fmt.Fprintf(w, " %s\t%ds\n", label("flow-timeout"), state.Conf.FlowTimeout) if err := w.Flush(); err != nil { return err } @@ -317,13 +316,13 @@ func runShowVPPLBState(ctx context.Context, client grpcapi.MaglevClient, _ []str for _, v := range state.Vips { fmt.Println() w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) - fmt.Fprintf(w, "%s\t%s\n", label("vip"), stripHostMask(v.Prefix)) - fmt.Fprintf(w, " %s\t%s\n", label("protocol"), protoString(v.Protocol)) - fmt.Fprintf(w, " %s\t%d\n", label("port"), v.Port) - fmt.Fprintf(w, " %s\t%s\n", label("encap"), v.Encap) - fmt.Fprintf(w, " %s\t%t\n", label("src-ip-sticky"), v.SrcIpSticky) - fmt.Fprintf(w, " %s\t%d\n", label("flow-table-length"), v.FlowTableLength) - fmt.Fprintf(w, " %s\t%d\n", label("application-servers"), len(v.ApplicationServers)) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("vip"), stripHostMask(v.Prefix)) + _, _ = fmt.Fprintf(w, " %s\t%s\n", label("protocol"), protoString(v.Protocol)) + _, _ = fmt.Fprintf(w, " %s\t%d\n", label("port"), v.Port) + _, _ = fmt.Fprintf(w, " %s\t%s\n", label("encap"), v.Encap) + _, _ = fmt.Fprintf(w, " %s\t%t\n", label("src-ip-sticky"), v.SrcIpSticky) + _, _ = fmt.Fprintf(w, " %s\t%d\n", label("flow-table-length"), v.FlowTableLength) + _, _ = fmt.Fprintf(w, " %s\t%d\n", label("application-servers"), len(v.ApplicationServers)) if err := w.Flush(); err != nil { return err } @@ -367,9 +366,9 @@ func runShowVPPLBCounters(ctx context.Context, client grpcapi.MaglevClient, _ [] // every packet count). fmt.Println(label("frontend-counters")) w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) - fmt.Fprintf(w, " vip\tproto\tport\tfirst\tnext\tuntracked\tno-server\tfib-packets\tfib-bytes\n") + _, _ = fmt.Fprintf(w, " vip\tproto\tport\tfirst\tnext\tuntracked\tno-server\tfib-packets\tfib-bytes\n") for _, v := range resp.Vips { - fmt.Fprintf(w, " %s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", + _, _ = fmt.Fprintf(w, " %s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", stripHostMask(v.Prefix), v.Protocol, v.Port, v.FirstPacket, v.NextPacket, v.UntrackedPacket, v.NoServer, @@ -458,16 +457,16 @@ func runShowFrontend(ctx context.Context, client grpcapi.MaglevClient, args []st } w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) - fmt.Fprintf(w, "%s\t%s\n", label("name"), info.Name) - fmt.Fprintf(w, "%s\t%s\n", label("address"), info.Address) - fmt.Fprintf(w, "%s\t%s\n", label("protocol"), info.Protocol) - fmt.Fprintf(w, "%s\t%d\n", label("port"), info.Port) - fmt.Fprintf(w, "%s\t%t\n", label("src-ip-sticky"), info.SrcIpSticky) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("name"), info.Name) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("address"), info.Address) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("protocol"), info.Protocol) + _, _ = fmt.Fprintf(w, "%s\t%d\n", label("port"), info.Port) + _, _ = fmt.Fprintf(w, "%s\t%t\n", label("src-ip-sticky"), info.SrcIpSticky) if info.Description != "" { - fmt.Fprintf(w, "%s\t%s\n", label("description"), info.Description) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("description"), info.Description) } if len(info.Pools) > 0 { - fmt.Fprintf(w, "%s\n", label("pools")) + _, _ = fmt.Fprintf(w, "%s\n", label("pools")) } if err := w.Flush(); err != nil { return err @@ -533,16 +532,16 @@ func runShowBackend(ctx context.Context, client grpcapi.MaglevClient, args []str return err } w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) - fmt.Fprintf(w, "%s\t%s\n", label("name"), info.Name) - fmt.Fprintf(w, "%s\t%s\n", label("address"), info.Address) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("name"), info.Name) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("address"), info.Address) stateDur := "" if len(info.Transitions) > 0 { since := time.Since(time.Unix(0, info.Transitions[0].AtUnixNs)) stateDur = " for " + formatDuration(since) } - fmt.Fprintf(w, "%s\t%s%s\n", label("state"), info.State, stateDur) - fmt.Fprintf(w, "%s\t%v\n", label("enabled"), info.Enabled) - fmt.Fprintf(w, "%s\t%s\n", label("healthcheck"), info.Healthcheck) + _, _ = fmt.Fprintf(w, "%s\t%s%s\n", label("state"), info.State, stateDur) + _, _ = fmt.Fprintf(w, "%s\t%v\n", label("enabled"), info.Enabled) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("healthcheck"), info.Healthcheck) for i, t := range info.Transitions { ts := time.Unix(0, t.AtUnixNs) var lbl string @@ -554,7 +553,7 @@ func runShowBackend(ctx context.Context, client grpcapi.MaglevClient, args []str // is identical on every row, keeping columns aligned). lbl = label(" ") } - fmt.Fprintf(w, "%s\t%s → %s\t%s\t%s\n", + _, _ = fmt.Fprintf(w, "%s\t%s → %s\t%s\t%s\n", lbl, t.From, t.To, ts.Format("2006-01-02 15:04:05.000"), @@ -588,41 +587,41 @@ func runShowHealthCheck(ctx context.Context, client grpcapi.MaglevClient, args [ return err } w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) - fmt.Fprintf(w, "%s\t%s\n", label("name"), info.Name) - fmt.Fprintf(w, "%s\t%s\n", label("type"), info.Type) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("name"), info.Name) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("type"), info.Type) if info.Port > 0 { - fmt.Fprintf(w, "%s\t%d\n", label("port"), info.Port) + _, _ = fmt.Fprintf(w, "%s\t%d\n", label("port"), info.Port) } - fmt.Fprintf(w, "%s\t%s\n", label("interval"), time.Duration(info.IntervalNs)) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("interval"), time.Duration(info.IntervalNs)) if info.FastIntervalNs > 0 { - fmt.Fprintf(w, "%s\t%s\n", label("fast-interval"), time.Duration(info.FastIntervalNs)) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("fast-interval"), time.Duration(info.FastIntervalNs)) } if info.DownIntervalNs > 0 { - fmt.Fprintf(w, "%s\t%s\n", label("down-interval"), time.Duration(info.DownIntervalNs)) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("down-interval"), time.Duration(info.DownIntervalNs)) } - fmt.Fprintf(w, "%s\t%s\n", label("timeout"), time.Duration(info.TimeoutNs)) - fmt.Fprintf(w, "%s\t%d\n", label("rise"), info.Rise) - fmt.Fprintf(w, "%s\t%d\n", label("fall"), info.Fall) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("timeout"), time.Duration(info.TimeoutNs)) + _, _ = fmt.Fprintf(w, "%s\t%d\n", label("rise"), info.Rise) + _, _ = fmt.Fprintf(w, "%s\t%d\n", label("fall"), info.Fall) if info.ProbeIpv4Src != "" { - fmt.Fprintf(w, "%s\t%s\n", label("probe-ipv4-src"), info.ProbeIpv4Src) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("probe-ipv4-src"), info.ProbeIpv4Src) } if info.ProbeIpv6Src != "" { - fmt.Fprintf(w, "%s\t%s\n", label("probe-ipv6-src"), info.ProbeIpv6Src) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("probe-ipv6-src"), info.ProbeIpv6Src) } if h := info.Http; h != nil { - fmt.Fprintf(w, "%s\t%s\n", label("http.path"), h.Path) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("http.path"), h.Path) if h.Host != "" { - fmt.Fprintf(w, "%s\t%s\n", label("http.host"), h.Host) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("http.host"), h.Host) } - fmt.Fprintf(w, "%s\t%d-%d\n", label("http.response-code"), h.ResponseCodeMin, h.ResponseCodeMax) + _, _ = fmt.Fprintf(w, "%s\t%d-%d\n", label("http.response-code"), h.ResponseCodeMin, h.ResponseCodeMax) if h.ResponseRegexp != "" { - fmt.Fprintf(w, "%s\t%s\n", label("http.response-regexp"), h.ResponseRegexp) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("http.response-regexp"), h.ResponseRegexp) } } if t := info.Tcp; t != nil { - fmt.Fprintf(w, "%s\t%v\n", label("tcp.ssl"), t.Ssl) + _, _ = fmt.Fprintf(w, "%s\t%v\n", label("tcp.ssl"), t.Ssl) if t.ServerName != "" { - fmt.Fprintf(w, "%s\t%s\n", label("tcp.server-name"), t.ServerName) + _, _ = fmt.Fprintf(w, "%s\t%s\n", label("tcp.server-name"), t.ServerName) } } return w.Flush() diff --git a/cmd/maglevc/complete.go b/cmd/maglevc/complete.go index 1d82b8d..a513b02 100644 --- a/cmd/maglevc/complete.go +++ b/cmd/maglevc/complete.go @@ -96,7 +96,9 @@ func (ql *questionListener) OnChange(line []rune, pos int, key rune) (newLine [] // "unknown" banner, then list what's available at the deepest // node we *did* reach so the operator can see what they could // have typed instead. The partial at the cursor is irrelevant - // once the left context is already broken. + // once the left context is already broken — no downstream + // branch reads it after we enter this branch, so we don't + // bother clearing it. consumed := prefix[:len(prefix)-len(remaining)] bad := remaining[0] if len(consumed) == 0 { @@ -105,7 +107,6 @@ func (ql *questionListener) OnChange(line []rune, pos int, key rune) (newLine [] unknownMsg = fmt.Sprintf("unknown subcommand %q after %q", bad, strings.Join(consumed, " ")) } displayPrefix = strings.Join(consumed, " ") - partial = "" } else if partial != "" { if next := matchFixedChild(node.Children, partial); next != nil { // Partial uniquely matched a fixed child — descend into it. @@ -152,22 +153,22 @@ func (ql *questionListener) OnChange(line []rune, pos int, key rune) (newLine [] // full "maglev> show vpp lb ?" ourselves as the first write — // that lands on the just-cleaned row, birdc-style, and the // subsequent Fprintfs each redraw a fresh prompt below the help. - fmt.Fprintf(ql.rl.Stderr(), "%s%s\r\n", ql.rl.Config.Prompt, string(line)) + _, _ = fmt.Fprintf(ql.rl.Stderr(), "%s%s\r\n", ql.rl.Config.Prompt, string(line)) if unknownMsg != "" { - fmt.Fprintf(ql.rl.Stderr(), " %s\r\n", unknownMsg) + _, _ = fmt.Fprintf(ql.rl.Stderr(), " %s\r\n", unknownMsg) } if len(lines) == 0 { - fmt.Fprintf(ql.rl.Stderr(), " \r\n") + _, _ = fmt.Fprintf(ql.rl.Stderr(), " \r\n") } else { for _, l := range lines { if l.help != "" { - fmt.Fprintf(ql.rl.Stderr(), "%-*s %s\r\n", maxLen+2, l.path, l.help) + _, _ = fmt.Fprintf(ql.rl.Stderr(), "%-*s %s\r\n", maxLen+2, l.path, l.help) } else { - fmt.Fprintf(ql.rl.Stderr(), "%s\r\n", l.path) + _, _ = fmt.Fprintf(ql.rl.Stderr(), "%s\r\n", l.path) } } if len(dynValues) > 0 { - fmt.Fprintf(ql.rl.Stderr(), " %s: %s\r\n", dynWord, strings.Join(dynValues, " ")) + _, _ = fmt.Fprintf(ql.rl.Stderr(), " %s: %s\r\n", dynWord, strings.Join(dynValues, " ")) } } diff --git a/cmd/maglevc/main.go b/cmd/maglevc/main.go index 1701c57..dac0e0c 100644 --- a/cmd/maglevc/main.go +++ b/cmd/maglevc/main.go @@ -43,7 +43,7 @@ func run() error { if err != nil { return fmt.Errorf("connect %s: %w", *serverAddr, err) } - defer conn.Close() + defer func() { _ = conn.Close() }() client := grpcapi.NewMaglevClient(conn) ctx := context.Background() diff --git a/cmd/maglevc/shell.go b/cmd/maglevc/shell.go index f4788a2..55c85bd 100644 --- a/cmd/maglevc/shell.go +++ b/cmd/maglevc/shell.go @@ -36,7 +36,7 @@ func runShell(ctx context.Context, client grpcapi.MaglevClient) error { return fmt.Errorf("readline init: %w", err) } ql.rl = rl - defer rl.Close() + defer func() { _ = rl.Close() }() for { line, err := rl.Readline() @@ -59,7 +59,7 @@ func runShell(ctx context.Context, client grpcapi.MaglevClient) error { if errors.Is(err, errQuit) { return nil } - fmt.Fprintf(rl.Stderr(), "%s\n", formatError(err)) + _, _ = fmt.Fprintf(rl.Stderr(), "%s\n", formatError(err)) } } } diff --git a/docs/config-guide.md b/docs/config-guide.md index 991b9db..b1e07a3 100644 --- a/docs/config-guide.md +++ b/docs/config-guide.md @@ -57,6 +57,22 @@ Global settings for the health checker engine. empty or omitted, probes run in the current (default) network namespace. Useful when backends are reachable only through a dedicated dataplane namespace. + **Capability requirement**: setting this field makes `maglevd` call + `setns(CLONE_NEWNET)` on the probe thread before each probe, which the + kernel only permits to processes holding `CAP_SYS_ADMIN` in the target + namespace's user namespace (`setns(2)`). The Debian systemd unit + (`vpp-maglev.service`) already grants this capability; if you run + `maglevd` by hand under a non-root user make sure the binary has + `CAP_SYS_ADMIN` via `setcap cap_net_raw,cap_sys_admin=eip + /usr/sbin/maglevd` or equivalent, otherwise every probe fails with + `enter netns "": operation not permitted` and all backends + transition to `down` on their first probe. + + Also make sure the named namespace is mounted under `/var/run/netns/` + (which is where `ip netns add` puts it) and that it is readable by + the user `maglevd` runs as — the default mode from `ip netns add` is + `0644`, which is fine for any user. + Example: ```yaml maglev: diff --git a/docs/healthchecks.md b/docs/healthchecks.md index 8474d9e..c34faf2 100644 --- a/docs/healthchecks.md +++ b/docs/healthchecks.md @@ -88,6 +88,28 @@ recovering backend is re-evaluated quickly without waiting a full `interval`. Using `down-interval` for fully down backends reduces probe traffic to servers that are known to be offline. +### Jitter + +Every computed interval is then scaled by a uniformly-distributed random +factor in `[0.9, 1.1)` before the probe worker sleeps. The `±10%` jitter +prevents all probes from aligning on the same tick after a restart or a +config reload — a deployment with dozens of backends would otherwise send a +bursty, phase-locked flight of probes every `interval`. The jitter is +applied once per probe iteration, not averaged across iterations, so the +long-run cadence is still the configured `interval`. + +### Probe timing while a probe is in flight + +The probe worker loop is synchronous: each iteration blocks on the probe's +completion (or its `timeout`) before computing the next `sleepFor`. That +means a fully-timing-out probe effectively runs at +`timeout + fast-interval` cadence, not `fast-interval` cadence. If you +want fast fault detection against backends that hang rather than refuse +the connection (e.g. a dead TCP stack, or an unreachable backend via a +blackhole route), lower `timeout` rather than `fast-interval`. Setting +`fast-interval` below `timeout` doesn't make probes fire more frequently — +it just changes the idle gap between a completed probe and the next one. + --- ## Transition events diff --git a/docs/maglevd-frontend.8 b/docs/maglevd-frontend.8 index 0b53419..ab18692 100644 --- a/docs/maglevd-frontend.8 +++ b/docs/maglevd-frontend.8 @@ -67,6 +67,36 @@ and are set to non\-empty values at startup; otherwise .B /admin/ returns 404 and the SPA hides the admin\-toggle button entirely. +.PP +Per\-user persistent state lives in two cookies: +.B maglev_scope +remembers which maglevd the user was last looking at (hydrated on +page load and reconciled against the fetched server list, so a +removed/renamed maglevd falls through cleanly instead of leaving a +ghost selection), and +.B maglev_zippy_open +remembers which collapsible cards are open, scoped per\-maglevd so +opening a frontend card on one server doesn't affect the equivalent +card on another. Both are +.BR "Path=/; Max-Age=1y; SameSite=Lax" , +are best\-effort (a missing or corrupt value just falls back to +"everything closed" / "first maglevd"), and hold no sensitive data. +.PP +The SPA shows a health\-cascade icon next to every frontend name: +.B \(OK +for fully healthy, a double\-bang for a control\-plane vs dataplane +disagreement (eff_weight > 0 but zero VPP buckets), an exclamation +mark for a fully\-drained primary pool, a warning triangle for any +backend not in +.B up +state, and a question mark as a fallthrough for logic bugs in the +cascade. The +.B "lb buckets" +column on each backend row reports VPP's Maglev hash table share +for that AS, debounced to at most one +.B GetVPPLBState +fetch per second per maglevd and refreshed live on every backend +transition or weight edit. .SH OPTIONS Each flag may also be supplied via an environment variable (shown in parentheses); the flag takes precedence when both are set. All env @@ -154,6 +184,30 @@ Returns the fresh backend snapshot as JSON. Weight change POST. Body is .B {"weight": 0\-100, "flush": bool} . Returns the fresh frontend snapshot as JSON. +.SH SIGNALS +.TP +.BR SIGTERM ", " SIGINT +Graceful shutdown: active gRPC streams are closed, the HTTP server +drains, then the process exits. +.TP +.B SIGHUP +Explicitly ignored. A controlling\-terminal disconnect (closing the +SSH session the dashboard was started from, for example) would +otherwise deliver +.B SIGHUP +under Go's default handler and terminate the process with +.BR Hangup . +Since +.B maglevd\-frontend +has no config file beyond its command\-line flags there is nothing +meaningful to +.I reload +on +.BR SIGHUP , +and inheriting the default "exit on hangup" semantics is the wrong +behaviour for a long\-running network daemon. Use +.B SIGTERM +for clean shutdown instead. .SH REVERSE PROXY NOTES The SSE stream has a handful of operational requirements that every reverse proxy must satisfy: diff --git a/docs/maglevd.8 b/docs/maglevd.8 index c65625c..49fb003 100644 --- a/docs/maglevd.8 +++ b/docs/maglevd.8 @@ -36,11 +36,19 @@ default 30s), on reloads, and on operator request via .BR maglevc . .PP -The aggregated backend state, VPP dataplane state, and per\-VIP / -per\-backend stats\-segment counters are exposed via a gRPC API (and -scraped into Prometheus when the +The aggregated backend state, VPP dataplane state, and per\-VIP +stats\-segment counters are exposed via a gRPC API (and scraped +into Prometheus when the .B /metrics -endpoint is enabled). +endpoint is enabled). Per\-backend packet counters are intentionally +not exposed: VPP's LB plugin forwards by writing +.B adj_index[VLIB_TX] +directly and bypassing +.BR ip4_lookup_inline " / " ip6_lookup_inline , +which is the only path that increments +.BR /net/route/to , +so the backend's FIB entry stats index never ticks for LB\-forwarded +traffic. See .BR maglevc (1) for the interactive CLI client. @@ -94,6 +102,42 @@ immediately. Gracefully shut down: drain active gRPC streams, then exit. VPP dataplane state is left in place so that existing VIPs continue to forward traffic during a restart. +.SH CAPABILITIES +.TP +.B CAP_NET_RAW +Required when any health check uses +.BR "type: icmp" . +Raw sockets for ICMP echo. TCP and HTTP(S) checks use normal TCP +sockets and need no special capability. +.TP +.B CAP_SYS_ADMIN +Required when the +.B healthchecker.netns +field is set in the YAML configuration. The probe loop calls +.BR setns (2) +with +.B CLONE_NEWNET +to enter the target network namespace before each probe; the +kernel only permits that to processes holding +.B CAP_SYS_ADMIN +in the target namespace's user namespace. Without it, every probe +fails with +.B enter netns "": operation not permitted +and every backend flips to +.B down +on its first probe. Omit the capability when the deployment doesn't +use namespace\-scoped health checks \(em the Debian systemd unit +ships with both +.B CAP_NET_RAW +and +.B CAP_SYS_ADMIN +in its +.B AmbientCapabilities +and +.B CapabilityBoundingSet +by default, and operators can drop +.B CAP_SYS_ADMIN +via a drop\-in override if they prefer the narrower surface. .SH FILES .TP .I /etc/vpp-maglev/maglev.yaml diff --git a/docs/user-guide.md b/docs/user-guide.md index 9041a44..15fea34 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -32,9 +32,34 @@ are used for anything not set. ### Capabilities -`maglevd` requires `CAP_NET_RAW` when any health check uses `type: icmp`. -All other check types (`tcp`, `http`) use normal TCP sockets and require no -special capabilities. +`maglevd` requires: + +- **`CAP_NET_RAW`** when any health check uses `type: icmp` — raw + sockets for ICMP echo. `tcp`, `http`, and `https` checks use + normal TCP sockets and do not need this capability. +- **`CAP_SYS_ADMIN`** when `healthchecker.netns` is set in the + config — the probe loop calls `setns(CLONE_NEWNET)` to join the + target network namespace, and the kernel only permits that to + processes holding `CAP_SYS_ADMIN` in the target's user namespace + (see `setns(2)`). Without it the probe fails with + `enter netns "": operation not permitted` and every backend + flips to `down` / `L4CON` on its first probe. + +The Debian systemd unit (`vpp-maglev.service`) grants both via +`AmbientCapabilities` and `CapabilityBoundingSet`, so +`systemctl start vpp-maglev` works out of the box under the +unprivileged `maglevd` user. When running the binary by hand under +a non-root account, either: + +- `setcap cap_net_raw,cap_sys_admin=eip /usr/sbin/maglevd` once at + install time, or +- run under `systemd-run -p AmbientCapabilities='CAP_NET_RAW CAP_SYS_ADMIN' ...` + for ad-hoc tests. + +If your deployment doesn't use `netns:` at all, drop +`CAP_SYS_ADMIN` from the bounding set in the service unit — it's a +broad capability and there's no value in keeping it when nothing +calls `setns`. ### Logging @@ -139,14 +164,22 @@ show vpp lb state Show the VPP load-balancer plugin state: global configuration, configured VIPs, and their attached application servers (address, weight, bucket count). Returns an error if VPP is not connected. -show vpp lb counters Show per-VIP and per-backend packet/byte counters - from the VPP stats segment, refreshed roughly every - five seconds by maglevd. Each VIP row reports the LB - plugin counters (next, first, untracked, no-server) - and the FIB packets/bytes at the VIP's host prefix. - Each backend row reports FIB packets/bytes at the - backend's /32 or /128 prefix. Use Prometheus for - live rates; this command shows absolute values. +show vpp lb counters Show per-VIP packet/byte counters from the VPP stats + segment, refreshed roughly every five seconds by + maglevd. Each row reports the four LB plugin counters + (first, next, untracked, no-server) and the FIB + packets/bytes at the VIP's host prefix. Use Prometheus + for live rates; this command shows absolute values. + + Per-backend packet counters are not shown: VPP's LB + plugin forwarding node writes adj_index[VLIB_TX] + directly and bypasses ip{4,6}_lookup_inline, which is + the only path that increments /net/route/to. The + backend's FIB load_balance stats_index therefore + never ticks for LB-forwarded traffic, and exposing + zeros would mislead. See docs/implementation/TODO + for the upstream path that would fix this (new + lb_as_stats_dump API message). sync vpp lb state [] Reconcile the VPP load-balancer dataplane from the running config. Without a name: runs a full sync — @@ -285,6 +318,79 @@ the SPA's "admin…" toggle becomes visible. When either is missing or empty the `/admin/` route returns 404 and the SPA hides the toggle — `/view/` is always reachable read-only. +### What the SPA shows + +After the dashboard loads, the header carries a **scope selector**: +one pill per configured maglevd, coloured green when the frontend's +gRPC channel to that maglevd is alive and red when it's dropped. +Click a pill to flip the view to that maglevd's frontends. Your +selection is persisted in a `maglev_scope` cookie (Path=/; +Max-Age=1y; SameSite=Lax), so the next page load lands on the same +server you were last looking at. If the cookie references a +maglevd that's no longer in the server list (it was removed from +`-server` or renamed), the hydration path falls through to the +first maglevd in the list instead of leaving you on a ghost +selection. + +The **frontend list** is a stack of collapsible cards +(`
` elements) — one per VIP. Each card header shows a +fixed-width slot carrying a health icon, the frontend name, its +aggregate state badge (`up` / `down` / `unknown`), and the +address, protocol, and description. The health icon is a cascade +derived from the current backend state + VPP bucket allocation: + +| Icon | Meaning | +|---|---| +| ✅ | All backends `up`, the primary pool is serving, and every backend with `effective_weight > 0` has VPP buckets > 0. | +| ‼️ | At least one backend has `effective_weight > 0` but zero VPP buckets — the control plane and dataplane disagree, almost always a bug worth investigating. | +| ❗ | The primary pool has no serving backend (every pool[0] backend has `effective_weight = 0`); the VIP is running on its fallback or nothing at all. | +| ⚠️ | At least one backend is not `up`, nothing worse. Typical maintenance / partial outage state. | +| ❓ | Fallthrough; should be unreachable in practice and indicates a logic bug in the health-cascade code. | + +The card body is a table with one row per `(pool, backend)` tuple. +Columns: `pool`, `backend`, `address`, `state`, `weight`, +`effective`, `lb buckets`, `last transition`, and (in admin mode) a +kebab `⋮` menu for per-backend actions. The **LB buckets** column +reports VPP's Maglev hash table bucket count for that backend, +refreshed live via a debounced `GetVPPLBState` scrape whenever a +transition or weight edit happens (at most once per second per +maglevd). A value of `0` means "in VPP but drained", `—` means +"not in VPP at all" (e.g. between a sync and the next poll), and a +non-zero number is the share of the 1024-bucket table currently +pointing at that AS. + +Card open/closed state is also persisted per-panel in a +`maglev_zippy_open` cookie, **scoped per maglevd** (the id is +`frontend--`), so collapsing a card on +`chbtl2` doesn't also collapse the equivalent card on `localhost`. +On first load every card starts closed; unfolding one writes it to +the cookie for subsequent visits. The cookie is a best-effort hint +— a missing or corrupt value just falls back to "everything +closed", so losing it (browser clear, expiry, private window, etc.) +is purely cosmetic. + +When `admin_enabled` is true the header gains an **admin toggle** +that switches between `/view/` (read-only) and `/admin/` (basic +auth, mutation actions exposed). Inside admin mode every backend +row grows a `⋮` menu with `pause`, `resume`, `enable`, `disable`, +and `set weight…` entries. Lifecycle actions open a confirmation +dialog that spells out the dataplane consequence in plain English +(`disable` specifically calls out that it drops live sessions via +the flow-table flush). The weight dialog has a 0-100 slider and a +`flush existing flows` checkbox — unchecked is the graceful drain +(new flows move, existing ones finish naturally), checked is the +immediate session-drop path. + +Also visible in admin mode: a **Debug panel** at the bottom of the +page with a rolling tail of every event the SPA has seen across +all maglevds — `backend` and `frontend` transitions, log lines, +`maglevd-status` flips, `vpp-status` flips, and the VPP LB sync +events (`vpp-lb-sync-*`) with their full attribute set formatted +for scanning. A scope filter keeps the tail narrowed to the +current maglevd by default; a `all maglevds` checkbox flips it to +firehose mode, and a `pause` button freezes the tail so you can +read back. + ### HTTP surface - **`/view/`** — static SPA (dashboard). No authentication. diff --git a/internal/grpcapi/server_test.go b/internal/grpcapi/server_test.go index b1c6a5d..b2d79f1 100644 --- a/internal/grpcapi/server_test.go +++ b/internal/grpcapi/server_test.go @@ -71,7 +71,7 @@ func startTestServer(t *testing.T, ctx context.Context, c *checker.Checker) (Mag t.Fatalf("dial: %v", err) } return NewMaglevClient(conn), func() { - conn.Close() + _ = conn.Close() srv.Stop() } } diff --git a/internal/prober/http.go b/internal/prober/http.go index 4ad943f..dc1bf7a 100644 --- a/internal/prober/http.go +++ b/internal/prober/http.go @@ -69,7 +69,7 @@ func doHTTPProbe(ctx context.Context, cfg ProbeConfig, useTLS bool) health.Probe if useTLS { tlsConn := tls.Client(c, tlsConfig(p.ServerName, p.InsecureSkipVerify)) if err := tlsConn.HandshakeContext(ctx); err != nil { - c.Close() + _ = c.Close() return err } conn = tlsConn @@ -92,7 +92,7 @@ func doHTTPProbe(ctx context.Context, cfg ProbeConfig, useTLS bool) health.Probe } return health.ProbeResult{OK: false, Layer: health.LayerL4, Code: "L4CON", Detail: dialErr.Error()} } - defer conn.Close() + defer func() { _ = conn.Close() }() transport := &http.Transport{ DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { @@ -122,7 +122,7 @@ func doHTTPProbe(ctx context.Context, cfg ProbeConfig, useTLS bool) health.Probe } return health.ProbeResult{OK: false, Layer: health.LayerL7, Code: "L7RSP", Detail: err.Error()} } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() if resp.StatusCode < p.ResponseCodeMin || resp.StatusCode > p.ResponseCodeMax { return health.ProbeResult{ diff --git a/internal/prober/http_test.go b/internal/prober/http_test.go index 4125d61..a6294d0 100644 --- a/internal/prober/http_test.go +++ b/internal/prober/http_test.go @@ -61,7 +61,7 @@ func dialAndProbe(ctx context.Context, addr string, cfg ProbeConfig) (bool, erro if err != nil { return false, err } - defer resp.Body.Close() + defer func() { _ = resp.Body.Close() }() if resp.StatusCode < p.ResponseCodeMin || resp.StatusCode > p.ResponseCodeMax { return false, nil @@ -78,7 +78,7 @@ func dialAndProbe(ctx context.Context, addr string, cfg ProbeConfig) (bool, erro func TestHTTPProbeStatusCode(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) - fmt.Fprint(w, "healthy") + _, _ = fmt.Fprint(w, "healthy") })) defer srv.Close() @@ -124,7 +124,7 @@ func TestHTTPProbeWrongStatusCode(t *testing.T) { func TestHTTPProbeRegexpMatch(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - fmt.Fprint(w, `{"status":"ok"}`) + _, _ = fmt.Fprint(w, `{"status":"ok"}`) })) defer srv.Close() @@ -148,7 +148,7 @@ func TestHTTPProbeRegexpMatch(t *testing.T) { func TestHTTPProbeRegexpNoMatch(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { - fmt.Fprint(w, `{"status":"degraded"}`) + _, _ = fmt.Fprint(w, `{"status":"degraded"}`) })) defer srv.Close() @@ -178,7 +178,7 @@ func TestHTTPSProbe(t *testing.T) { host, portStr, _ := net.SplitHostPort(srv.Listener.Addr().String()) port := uint16(0) - fmt.Sscanf(portStr, "%d", &port) + _, _ = fmt.Sscanf(portStr, "%d", &port) cfg := ProbeConfig{ Target: net.ParseIP(host), diff --git a/internal/prober/icmp.go b/internal/prober/icmp.go index b8c39ea..97a186f 100644 --- a/internal/prober/icmp.go +++ b/internal/prober/icmp.go @@ -44,7 +44,7 @@ func ICMPProbe(ctx context.Context, cfg ProbeConfig) health.ProbeResult { if err != nil { return fmt.Errorf("listen icmp (%s): %w", network, err) } - defer pc.Close() + defer func() { _ = pc.Close() }() id := rand.IntN(0xffff) + 1 seq := rand.IntN(0xffff) + 1 diff --git a/internal/prober/netns.go b/internal/prober/netns.go index 72afe31..b1c41b9 100644 --- a/internal/prober/netns.go +++ b/internal/prober/netns.go @@ -25,14 +25,14 @@ func inNetns(nsName string, fn func() error) error { if err != nil { return fmt.Errorf("get current netns: %w", err) } - defer origNs.Close() - defer netns.Set(origNs) //nolint:errcheck + defer func() { _ = origNs.Close() }() + defer func() { _ = netns.Set(origNs) }() targetNs, err := netns.GetFromName(nsName) if err != nil { return fmt.Errorf("get netns %q: %w", nsName, err) } - defer targetNs.Close() + defer func() { _ = targetNs.Close() }() if err := netns.Set(targetNs); err != nil { return fmt.Errorf("enter netns %q: %w", nsName, err) diff --git a/internal/prober/tcp.go b/internal/prober/tcp.go index ad4d6f9..1961544 100644 --- a/internal/prober/tcp.go +++ b/internal/prober/tcp.go @@ -49,16 +49,16 @@ func TCPProbe(ctx context.Context, cfg ProbeConfig) health.ProbeResult { } if !doTLS { - conn.Close() + _ = conn.Close() result = health.ProbeResult{OK: true, Layer: health.LayerL4, Code: "L4OK"} return nil } // TLS handshake. tlsConn := tls.Client(conn, tlsConfig(serverName, insecureSkipVerify)) - tlsConn.SetDeadline(time.Now().Add(cfg.Timeout)) //nolint:errcheck + _ = tlsConn.SetDeadline(time.Now().Add(cfg.Timeout)) if err := tlsConn.HandshakeContext(ctx); err != nil { - tlsConn.Close() + _ = tlsConn.Close() if isTimeout(err) { result = health.ProbeResult{OK: false, Layer: health.LayerL6, Code: "L6TOUT", Detail: err.Error()} } else { @@ -66,7 +66,7 @@ func TCPProbe(ctx context.Context, cfg ProbeConfig) health.ProbeResult { } return nil } - tlsConn.Close() + _ = tlsConn.Close() result = health.ProbeResult{OK: true, Layer: health.LayerL6, Code: "L6OK"} return nil })