From 5a7e2f77f15001fa3ad0fad227f9b04120df1332 Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Thu, 16 Apr 2026 17:36:42 +0200 Subject: [PATCH] Add ngx_http_ipng_stats_module: per-VIP, per-device traffic counters Full implementation of the nginx dynamic module with: - SO_BINDTODEVICE-based per-interface traffic attribution - Per-worker lock-free counters flushed to shared memory - Prometheus text and JSON scrape endpoint at configurable location - UDP-only global logtail (ipng_stats_logtail) for fire-and-forget access log streaming - $ipng_source_tag nginx variable for use in log_format/map - Histogram buckets, EWMA rate gauges, zone meta-metrics - Debian packaging (libnginx-mod-http-ipng-stats) - Robot Framework end-to-end tests via containerlab - SPDX Apache-2.0 headers on all source files --- .gitignore | 28 + LICENSE | 20 +- Makefile | 157 ++ README.md | 42 + config | 12 + debian/changelog | 12 + debian/control | 38 + debian/copyright | 28 + debian/libnginx-mod-http-ipng-stats.postinst | 42 + debian/libnginx-mod-http-ipng-stats.prerm | 26 + debian/mod-http-ipng-stats.conf | 2 + debian/rules | 39 + debian/source/format | 1 + docs/config-guide.md | 290 +++ docs/design.md | 213 +- docs/user-guide.md | 384 +++ src/ngx_http_ipng_stats_module.c | 2265 ++++++++++++++++++ tests/01-module/01-e2e.robot | 280 +++ tests/01-module/lab/client/start.sh | 23 + tests/01-module/lab/ipng-stats.clab.yml | 54 + tests/01-module/lab/server/nginx.conf | 58 + tests/01-module/lab/server/slow-backend.py | 22 + tests/01-module/lab/server/start.sh | 42 + tests/requirements.txt | 1 + tests/rf-run.sh | 39 + 25 files changed, 4016 insertions(+), 102 deletions(-) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README.md create mode 100644 config create mode 100644 debian/changelog create mode 100644 debian/control create mode 100644 debian/copyright create mode 100755 debian/libnginx-mod-http-ipng-stats.postinst create mode 100755 debian/libnginx-mod-http-ipng-stats.prerm create mode 100644 debian/mod-http-ipng-stats.conf create mode 100755 debian/rules create mode 100644 debian/source/format create mode 100644 docs/config-guide.md create mode 100644 docs/user-guide.md create mode 100644 src/ngx_http_ipng_stats_module.c create mode 100644 tests/01-module/01-e2e.robot create mode 100644 tests/01-module/lab/client/start.sh create mode 100644 tests/01-module/lab/ipng-stats.clab.yml create mode 100644 tests/01-module/lab/server/nginx.conf create mode 100644 tests/01-module/lab/server/slow-backend.py create mode 100644 tests/01-module/lab/server/start.sh create mode 100644 tests/requirements.txt create mode 100755 tests/rf-run.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7cf02e1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +# Build artifacts +build/ +objs/ +*.so +*.o + +# Debian build outputs (dpkg-buildpackage writes these to the parent dir, +# but if a helper extracts them here, ignore them) +*.deb +*.buildinfo +*.changes +debian/.debhelper/ +debian/debhelper-build-stamp +debian/files +debian/libnginx-mod-http-ipng-stats/ +debian/tmp/ +debian/*.substvars +debian/*.log + +# Test artifacts +tests/.venv/ +tests/out/ + +# Editor/OS cruft +*.swp +*.swo +*~ +.DS_Store diff --git a/LICENSE b/LICENSE index a81d042..4fc01d6 100644 --- a/LICENSE +++ b/LICENSE @@ -162,16 +162,16 @@ other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. + 9. Accepting Warranty or Support. While redistributing the Work or + Derivative Works thereof, You may choose to offer, and charge a + fee for, acceptance of support, warranty, indemnity, or other + liability obligations and/or rights consistent with this License. + However, in accepting such obligations, You may act only on Your + own behalf and on Your sole responsibility, not on behalf of any + other Contributor, and only if You agree to indemnify, defend, + and hold each Contributor harmless for any liability incurred by, + or claims asserted against, such Contributor by reason of your + accepting any such warranty or support. END OF TERMS AND CONDITIONS diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..04d6a3a --- /dev/null +++ b/Makefile @@ -0,0 +1,157 @@ +# SPDX-License-Identifier: Apache-2.0 +# Makefile for nginx-ipng-stats-plugin +# +# Targets: +# build - build ngx_http_ipng_stats_module.so out-of-tree. +# pkg-deb - build a .deb via dpkg-buildpackage for the current release. +# robot-test - build .deb, then run Robot Framework end-to-end tests +# in containerlab (requires docker + containerlab). +# install-deps - install build and test dependencies via apt. +# clean - remove build artifacts and the fetched nginx source tree. +# help - print this help. +# +# Overridable variables: +# NGINX_SRC - path to an unpacked nginx source tree. If unset, the +# `build` target will apt-source one into ./build/nginx-src. + +MODULE_NAME := ngx_http_ipng_stats_module +MODULE_DIR := $(CURDIR) +BUILD_DIR := $(CURDIR)/build + +NGINX_SRC ?= + +.PHONY: help build pkg-deb robot-test install-deps clean fetch-nginx-src + +TEST ?= tests/ + +help: + @echo "nginx-ipng-stats-plugin — make targets" + @echo "" + @echo " make build Build $(MODULE_NAME).so out-of-tree." + @echo " make pkg-deb Build a Debian package via dpkg-buildpackage." + @echo " make robot-test Build .deb, then run Robot Framework e2e tests." + @echo " make install-deps Install build and test dependencies (apt)." + @echo " make clean Remove build artifacts." + @echo "" + @echo "Overridable:" + @echo " NGINX_SRC= Use an existing nginx source tree." + @echo " TEST= Run a specific .robot file (default: tests/)." + +# ---------------------------------------------------------------------- +# build: out-of-tree dynamic module build +# ---------------------------------------------------------------------- + +build: $(BUILD_DIR)/$(MODULE_NAME).so + @echo "" + @echo "Built: $(BUILD_DIR)/$(MODULE_NAME).so" + @echo "" + @echo "To try it locally without installing a .deb:" + @echo " sudo install -m 0644 $(BUILD_DIR)/$(MODULE_NAME).so /usr/lib/nginx/modules/" + @echo " echo 'load_module modules/$(MODULE_NAME).so;' | sudo tee /etc/nginx/modules-enabled/50-mod-http-ipng-stats.conf" + @echo " sudo nginx -t && sudo nginx -s reload" + +$(BUILD_DIR)/$(MODULE_NAME).so: fetch-nginx-src + @set -e; \ + if [ -z "$(NGINX_SRC)" ]; then \ + NGX_SRC="$(BUILD_DIR)/nginx-src"; \ + else \ + NGX_SRC="$(NGINX_SRC)"; \ + fi; \ + echo "Configuring nginx in $$NGX_SRC against module at $(MODULE_DIR)"; \ + cd "$$NGX_SRC" && ./configure --with-compat --add-dynamic-module=$(MODULE_DIR); \ + echo "Building module"; \ + $(MAKE) -C "$$NGX_SRC" -f objs/Makefile modules; \ + mkdir -p $(BUILD_DIR); \ + cp "$$NGX_SRC/objs/$(MODULE_NAME).so" $(BUILD_DIR)/$(MODULE_NAME).so + +fetch-nginx-src: + @set -e; \ + if [ -n "$(NGINX_SRC)" ]; then \ + echo "Using NGINX_SRC=$(NGINX_SRC)"; \ + exit 0; \ + fi; \ + if [ -d "$(BUILD_DIR)/nginx-src" ] && [ -f "$(BUILD_DIR)/nginx-src/configure" ]; then \ + echo "Reusing $(BUILD_DIR)/nginx-src"; \ + exit 0; \ + fi; \ + mkdir -p $(BUILD_DIR); \ + if [ -d /usr/share/nginx/src ] && [ -f /usr/share/nginx/src/configure ]; then \ + echo "Copying /usr/share/nginx/src (from nginx-dev) to $(BUILD_DIR)/nginx-src"; \ + rm -rf $(BUILD_DIR)/nginx-src; \ + cp -a /usr/share/nginx/src $(BUILD_DIR)/nginx-src; \ + chmod -R u+w $(BUILD_DIR)/nginx-src; \ + exit 0; \ + fi; \ + rm -rf $(BUILD_DIR)/apt-src; \ + mkdir -p $(BUILD_DIR)/apt-src; \ + echo "Fetching nginx source via \`apt source nginx\` in $(BUILD_DIR)/apt-src"; \ + cd $(BUILD_DIR)/apt-src && apt source nginx; \ + NGX_DIR=$$(find $(BUILD_DIR)/apt-src -maxdepth 1 -type d -name 'nginx-*' | head -n1); \ + if [ -z "$$NGX_DIR" ]; then \ + echo "error: could not find unpacked nginx source tree under $(BUILD_DIR)/apt-src" >&2; \ + exit 1; \ + fi; \ + rm -rf $(BUILD_DIR)/nginx-src; \ + mv "$$NGX_DIR" $(BUILD_DIR)/nginx-src; \ + rm -rf $(BUILD_DIR)/apt-src + +# ---------------------------------------------------------------------- +# pkg-deb: build a .deb +# ---------------------------------------------------------------------- + +pkg-deb: + dpkg-buildpackage -us -uc -b + @mkdir -p $(BUILD_DIR) + @# dpkg-buildpackage writes artifacts to ../ — relocate them into + @# $(BUILD_DIR) so everything ephemeral lives under build/. + @for f in ../libnginx-mod-http-ipng-stats*.deb \ + ../libnginx-mod-http-ipng-stats*.ddeb \ + ../nginx-ipng-stats-plugin_*.buildinfo \ + ../nginx-ipng-stats-plugin_*.changes; do \ + if [ -f "$$f" ]; then mv -f "$$f" $(BUILD_DIR)/; fi; \ + done + @echo "" + @echo "Resulting .deb(s):" + @ls -1 $(BUILD_DIR)/*.deb 2>/dev/null || true + +# ---------------------------------------------------------------------- +# clean +# ---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- +# robot-test: containerlab + Robot Framework end-to-end tests +# ---------------------------------------------------------------------- + +tests/.venv: tests/requirements.txt + python3 -m venv tests/.venv + tests/.venv/bin/pip install -q -r tests/requirements.txt + +robot-test: tests/.venv + @if [ ! -f $(BUILD_DIR)/libnginx-mod-http-ipng-stats_*.deb ]; then \ + echo "error: no .deb found in $(BUILD_DIR)/. Run 'make pkg-deb' first." >&2; \ + exit 1; \ + fi + tests/rf-run.sh docker $(TEST) + +# ---------------------------------------------------------------------- +# install-deps: install build and test dependencies +# ---------------------------------------------------------------------- + +install-deps: + sudo apt-get update -qq + sudo apt-get install -y \ + nginx-dev dpkg-dev debhelper \ + python3 python3-venv \ + curl + @echo "" + @echo "Build dependencies installed. For 'make robot-test' you also need:" + @echo " - docker: https://docs.docker.com/engine/install/debian/" + @echo " - containerlab: https://containerlab.dev/install/" + +# ---------------------------------------------------------------------- +# clean +# ---------------------------------------------------------------------- + +clean: + rm -rf $(BUILD_DIR) tests/.venv tests/out + -dh_clean 2>/dev/null || true diff --git a/README.md b/README.md new file mode 100644 index 0000000..44cdea8 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ + +# nginx-ipng-stats-plugin + +Per-VIP, per-device traffic counters for nginx. Ships as a dynamic nginx module and a Debian package that loads into stock upstream +nginx on Debian Trixie. + +The module attributes every HTTP request to the interface it arrived on, using Linux `SO_BINDTODEVICE` on per-interface listening +sockets. Counters — requests, status codes, bytes, latency histograms — are exposed as Prometheus text or JSON from a single HTTP +scrape endpoint, filtered per-source. This is useful for any deployment where traffic arrives on distinct interfaces — GRE tunnels, +VLANs, bonded links, or plain ethernet — and per-interface observability is needed. + +Without any `device=`/`ipng_source_tag=` parameters, the module still counts and exposes per-VIP traffic under the configurable +default source tag (`direct`), which makes it a useful plain observability module for any nginx host. + +See [`docs/design.md`](docs/design.md) for the full design, including the attribution model, data flow, and requirements. + +## Quick start + +``` +make install-deps # install build and test dependencies (apt) +make build # build the .so out-of-tree +make pkg-deb # build a .deb package +make robot-test # run end-to-end tests via containerlab +``` + +## Installing + +``` +sudo dpkg -i build/*.deb +``` + +The package installs the `.so` into `/usr/lib/nginx/modules`, drops a `load_module` stanza into `/etc/nginx/modules-enabled/`, and runs +`nginx -t` before completing. + +## Configuring + +See [`docs/user-guide.md`](docs/user-guide.md) for an end-to-end walkthrough and [`docs/config-guide.md`](docs/config-guide.md) for the +directive and `listen` parameter reference. + +## License + +Apache-2.0. See [`LICENSE`](LICENSE). diff --git a/config b/config new file mode 100644 index 0000000..16c8e14 --- /dev/null +++ b/config @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: Apache-2.0 +ngx_addon_name=ngx_http_ipng_stats_module + +if test -n "$ngx_module_link"; then + ngx_module_type=HTTP + ngx_module_name=ngx_http_ipng_stats_module + ngx_module_srcs="$ngx_addon_dir/src/ngx_http_ipng_stats_module.c" + . auto/module +else + HTTP_MODULES="$HTTP_MODULES ngx_http_ipng_stats_module" + NGX_ADDON_SRCS="$NGX_ADDON_SRCS $ngx_addon_dir/src/ngx_http_ipng_stats_module.c" +fi diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..aa8d99d --- /dev/null +++ b/debian/changelog @@ -0,0 +1,12 @@ +nginx-ipng-stats-plugin (0.1.0-1) unstable; urgency=medium + + * Initial release. + - ngx_http_ipng_stats_module: per-VIP, per-device HTTP traffic + counters, attributed via SO_BINDTODEVICE on per-interface + listening sockets, exposed as Prometheus text and JSON from a + single scrape endpoint. + - Debian package libnginx-mod-http-ipng-stats builds against the + target release's nginx-dev headers with --with-compat and loads + into stock upstream nginx without recompiling nginx itself. + + -- Pim van Pelt Thu, 16 Apr 2026 00:00:00 +0000 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..f00412b --- /dev/null +++ b/debian/control @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: Apache-2.0 +Source: nginx-ipng-stats-plugin +Section: httpd +Priority: optional +Maintainer: Pim van Pelt +Build-Depends: + debhelper-compat (= 13), + nginx-dev, + libpcre2-dev, + zlib1g-dev, + libssl-dev +Standards-Version: 4.6.2 +Homepage: https://git.ipng.ch/ipng/nginx-ipng-stats-plugin +Rules-Requires-Root: no + +Package: libnginx-mod-http-ipng-stats +Architecture: any +Depends: + ${misc:Depends}, + ${shlibs:Depends}, + nginx (>= 1.26~) +Description: nginx dynamic module for per-VIP, per-device traffic counters + This package ships ngx_http_ipng_stats_module as a dynamic module + loadable into stock upstream nginx on Debian. The module records + per-VIP HTTP request counters, status code distribution, bytes in + and out, and request-duration histograms, and attributes each + request to the interface it arrived on. Counters are exposed as + Prometheus text and JSON from a single scrape endpoint. + . + Attribution is done by the Linux kernel's TCP socket lookup, using + SO_BINDTODEVICE on per-interface listening sockets. The module adds + device= and ipng_source_tag= parameters to the nginx listen + directive; the kernel routes each incoming connection to the + correct listener by ingress interface. + . + Typical use cases include GRE tunnel fleets, VLAN trunks, or any + deployment where traffic arrives on distinct interfaces and + per-interface observability is needed. diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..28d4f4d --- /dev/null +++ b/debian/copyright @@ -0,0 +1,28 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: nginx-ipng-stats-plugin +Upstream-Contact: Pim van Pelt +Source: https://git.ipng.ch/ipng/nginx-ipng-stats-plugin + +Files: * +Copyright: 2026 Pim van Pelt +License: Apache-2.0 + +Files: debian/* +Copyright: 2026 Pim van Pelt +License: Apache-2.0 + +License: Apache-2.0 + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + . + http://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + . + On Debian systems, the full text of the Apache License version 2.0 + can be found in `/usr/share/common-licenses/Apache-2.0'. diff --git a/debian/libnginx-mod-http-ipng-stats.postinst b/debian/libnginx-mod-http-ipng-stats.postinst new file mode 100755 index 0000000..a59b6b0 --- /dev/null +++ b/debian/libnginx-mod-http-ipng-stats.postinst @@ -0,0 +1,42 @@ +#!/bin/sh +# SPDX-License-Identifier: Apache-2.0 +# postinst for libnginx-mod-http-ipng-stats +set -e + +AVAIL=/etc/nginx/modules-available/50-mod-http-ipng-stats.conf +ENABLED=/etc/nginx/modules-enabled/50-mod-http-ipng-stats.conf + +case "$1" in + configure) + # Enable the module by symlinking it into modules-enabled. + if [ ! -L "$ENABLED" ] && [ -f "$AVAIL" ]; then + ln -s "$AVAIL" "$ENABLED" + fi + + # Sanity-check the resulting config. If nginx -t fails, back + # out the symlink so the operator isn't left with an nginx + # that cannot start. + if ! nginx -t > /dev/null 2>&1; then + echo "warning: nginx -t failed after enabling" \ + "libnginx-mod-http-ipng-stats; disabling the module." >&2 + rm -f "$ENABLED" + nginx -t >&2 || true + echo "warning: nginx-ipng-stats-plugin has been installed" \ + "but is NOT enabled; fix the configuration error" \ + "and re-enable with:" >&2 + echo " sudo ln -s $AVAIL $ENABLED && sudo nginx -s reload" >&2 + fi + ;; + + abort-upgrade|abort-remove|abort-deconfigure) + ;; + + *) + echo "postinst called with unknown argument \`$1'" >&2 + exit 1 + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/debian/libnginx-mod-http-ipng-stats.prerm b/debian/libnginx-mod-http-ipng-stats.prerm new file mode 100755 index 0000000..b6bea43 --- /dev/null +++ b/debian/libnginx-mod-http-ipng-stats.prerm @@ -0,0 +1,26 @@ +#!/bin/sh +# SPDX-License-Identifier: Apache-2.0 +# prerm for libnginx-mod-http-ipng-stats +set -e + +ENABLED=/etc/nginx/modules-enabled/50-mod-http-ipng-stats.conf + +case "$1" in + remove|upgrade|deconfigure) + if [ -L "$ENABLED" ]; then + rm -f "$ENABLED" + fi + ;; + + failed-upgrade) + ;; + + *) + echo "prerm called with unknown argument \`$1'" >&2 + exit 1 + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/debian/mod-http-ipng-stats.conf b/debian/mod-http-ipng-stats.conf new file mode 100644 index 0000000..6be662d --- /dev/null +++ b/debian/mod-http-ipng-stats.conf @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: Apache-2.0 +load_module modules/ngx_http_ipng_stats_module.so; diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..b323de1 --- /dev/null +++ b/debian/rules @@ -0,0 +1,39 @@ +#!/usr/bin/make -f +# SPDX-License-Identifier: Apache-2.0 +# debian/rules for nginx-ipng-stats-plugin +# +# The actual module build is driven by the top-level Makefile, which +# copies /usr/share/nginx/src (from nginx-dev) into a writable +# build/nginx-src/ and runs the out-of-tree --add-dynamic-module dance +# against it. debian/rules just delegates to `make build` and installs +# the resulting .so into the package tree. + +export DH_VERBOSE = 1 +export DEB_BUILD_MAINT_OPTIONS = hardening=+all + +MODULE_NAME := ngx_http_ipng_stats_module +PKG := libnginx-mod-http-ipng-stats + +%: + dh $@ + +override_dh_auto_configure: + # No-op: configure happens inside `make build`. + +override_dh_auto_build: + $(MAKE) build + +override_dh_auto_install: + install -D -m 0644 \ + $(CURDIR)/build/$(MODULE_NAME).so \ + $(CURDIR)/debian/$(PKG)/usr/lib/nginx/modules/$(MODULE_NAME).so + install -D -m 0644 \ + $(CURDIR)/debian/mod-http-ipng-stats.conf \ + $(CURDIR)/debian/$(PKG)/etc/nginx/modules-available/50-mod-http-ipng-stats.conf + +override_dh_auto_clean: + # Preserve build/ across clean so dpkg-buildpackage doesn't force + # a full rebuild of the fetched nginx source tree every time. The + # top-level `make clean` will flush it when the user really wants + # a fresh start. + dh_auto_clean diff --git a/debian/source/format b/debian/source/format new file mode 100644 index 0000000..89ae9db --- /dev/null +++ b/debian/source/format @@ -0,0 +1 @@ +3.0 (native) diff --git a/docs/config-guide.md b/docs/config-guide.md new file mode 100644 index 0000000..0245327 --- /dev/null +++ b/docs/config-guide.md @@ -0,0 +1,290 @@ + +# nginx-ipng-stats-plugin — Configuration Reference + +This document enumerates every directive and `listen` parameter introduced by `ngx_http_ipng_stats_module`, the nginx contexts in which +each is legal, the allowed values, and the default (NFR-7.2). For an end-to-end walkthrough read [`user-guide.md`](user-guide.md); for +the reasoning behind the design read [`design.md`](design.md). + +## `listen` parameters + +These extend the stock nginx `listen` directive. They are parsed by the module and stripped from `cf->args` before the original handler +is invoked, so they compose with every standard `listen` parameter (`ssl`, `http2`, `default_server`, `reuseport`, etc.). + +### `device=` + +**Context:** `listen` directive (wherever `listen` itself is legal — typically inside `server { ... }`). + +**Value:** a Linux interface name, e.g. `gre-mg1`, `eth0`. Maximum `IFNAMSIZ - 1` characters (15 on current kernels). + +**Default:** not set (plain listen). + +**Effect:** the resulting listening socket has `SO_BINDTODEVICE` applied at init-module time, making the kernel accept only connections +whose ingress interface is ``. Combined with a wildcard listen address (`80`, `[::]:80`) this is the mechanism by which the +plugin attributes traffic to a specific ingress interface. + +The `setsockopt(SO_BINDTODEVICE)` call runs in the nginx master process while it still holds its initial privileges — workers never +call it, and no additional Linux capability is required beyond what stock nginx already has (NFR-6.1). + +See FR-1.1, FR-1.5, FR-1.6. + +### `ipng_source_tag=` + +**Context:** `listen` directive. + +**Value:** a short opaque string identifying the traffic source. No length limit is enforced, but keep it ≤ 32 characters +for readable metric output. + +**Default:** when `ipng_source_tag=` is absent but `device=X` is set, the tag defaults to the interface name `X` (FR-1.4). When both +are absent, the tag defaults to the value of `ipng_stats_default_source` at the enclosing `http` level. + +**Effect:** every counter recorded on this listener carries `source_tag=` as a Prometheus label and as the outer key in the JSON +output. Scrape consumers can use this tag to filter the response to only the traffic they delivered. To obtain the VIP address in +nginx config (e.g. in `log_format` or `map`), use nginx's built-in `$server_addr` variable. + +See FR-1.2, FR-1.3, FR-1.4. + +## `http`-level directives + +All plugin-wide settings live in the `http { ... }` block. They cannot be overridden in inner contexts. + +### `ipng_stats_zone :` + +**Context:** `http`. + +**Value:** `` is a string identifier for the shared-memory zone; `` is an nginx size spec with `k` or `m` suffix. + +**Default:** none — the directive is mandatory if the module is loaded. + +**Effect:** allocates a shared-memory zone of `` bytes to hold the counter hash table. The `` must be stable across +`nginx -s reload` — renaming it forces a fresh segment, which is the one situation where counters reset without a master restart. + +**Sizing guidance:** the dominant factor in zone size is `~60 keys per (source, vip)` (one per observed status code). A host serving +50 VIPs behind 4 source interfaces uses `4 × 50 × 60 ≈ 12000` keys, each a few hundred bytes. The default-sized `4m` zone comfortably fits that. +If the zone fills, the module drops new keys and increments `nginx_ipng_zone_full_events_total` — resize and reload. + +See FR-5.1, NFR-3.1. + +### `ipng_stats_flush_interval ` + +**Context:** `http`. + +**Value:** an nginx duration string, e.g. `500ms`, `1s`, `2s`. + +**Default:** `1s`. + +**Minimum:** `100ms`. + +**Effect:** sets the cadence of the per-worker flush timer that moves private counter deltas into the shared-memory zone. Lower values +reduce the window of data loss if a worker crashes; higher values reduce the number of atomic adds on the shared zone. The default +is sized so that a scrape interval of 5–15 s sees effectively no lag. + +See FR-4.2, FR-5.2. + +### `ipng_stats_default_source ` + +**Context:** `http`. + +**Value:** a short string; see `ipng_source_tag=` above for conventions. + +**Default:** `direct`. + +**Effect:** sets the tag applied to listening sockets that have neither `device=` nor `ipng_source_tag=`. A host serving a mix of device-attributed +and direct web traffic will see direct traffic under this tag in the scrape output. Rename it to `public`, `localnet`, or anything else +that reads better for your deployment. + +See FR-1.3, FR-5.3. + +### `ipng_stats_buckets ...` + +**Context:** `http`. + +**Value:** two or more positive integers, strictly increasing, representing histogram bucket upper bounds in milliseconds. + +**Default:** `1 5 10 25 50 100 250 500 1000 2500 5000 10000`, plus an implicit `+Inf` bucket. + +**Effect:** overrides the default histogram bucket boundaries for both `request_duration` and `upstream_response_time` histograms. The +same set applies to every `(source, vip)` key in the module (v0.1 does not support per-key override; see +[`design.md`](design.md#decisions-deferred-post-v01)). + +See FR-2.3, FR-5.4. + +### `ipng_stats on | off` + +**Context:** `http`, `server`, `location`. + +**Value:** boolean (`on` or `off`). + +**Default:** `on` at the `http` level when the module is loaded. + +**Effect:** opts a context into or out of counting. Cost of a disabled context is one branch in the log-phase handler. A location +serving the `ipng_stats` scrape handler is automatically excluded from counting regardless of this directive — scraping the scrape +endpoint does not inflate its own counters. + +See FR-5.5. + +### `ipng_stats_logtail udp://: [buffer=] [flush=]` + +**Context:** `http`. + +**Value:** `` is the name of an existing `log_format` defined earlier in the same `http` block. The destination MUST be a +`udp://host:port` URI. `buffer=` is an optional nginx size spec (default `64k`, minimum `1k`). `flush=` is an optional +nginx duration string (default `1s`, minimum `100ms`). + +**Default:** not set — the directive is optional. When absent, no global logtail output is written. + +**Effect:** registers a global log-phase writer that fires unconditionally for every request, regardless of `server` or `location` +context. The named `log_format` is looked up from nginx's log module at configuration time; nginx's standard variable-expansion +machinery renders each line, so any variable usable in a regular `log_format` — including `$ipng_source_tag` and `$server_addr` — is +available here. + +Each worker maintains a private in-memory write buffer of `buffer=` bytes. Each buffer flush is transmitted as a single +`sendto()` call on a per-worker `SOCK_DGRAM` socket that is opened at worker init and closed at worker exit. The address is resolved +once at configuration time — there is no DNS lookup at flush time. The buffer is flushed when: + +- the buffer is full (immediate flush, no lines are dropped); +- the `flush=` timer fires (periodic flush); or +- the worker exits during a graceful reload or shutdown (final flush). + +This covers all request traffic with a single directive at the `http` level, eliminating the need to repeat `access_log` in every +`server` block. It is particularly useful when the format includes `$ipng_source_tag` and `$server_addr`, giving per-device attribution +in every log line at no extra configuration cost. + +File-based access logging is intentionally not supported by this directive — use nginx's built-in `access_log` directive for that. + +```nginx +log_format logtail '$host\t$remote_addr\t$ipng_source_tag\t$server_addr\t' + '$request_method\t$request_uri\t$status\t$body_bytes_sent\t' + '$request_time'; +ipng_stats_logtail logtail udp://127.0.0.1:9514 buffer=16k flush=1s; +``` + +**Constraints and behavior:** + +- `host` MUST be a literal IPv4 address. Hostnames and IPv6 addresses are not supported in v0.1. +- Each flush emits a single UDP datagram. At the default `buffer=64k` size, datagram payloads comfortably fit within the ~64 KB + loopback MTU. Operators using very large buffers on non-loopback paths should be aware of path MTU limits. +- If no receiver is listening, the kernel silently discards the datagram. The worker receives no error and is not blocked. This is + intentional: the logtail is a fire-and-forget analytics transport — zero disk I/O and no backpressure are the point. +- There is no acknowledgment, no retry, and no sequence number. Datagrams lost in transit or because the receiver is down are + permanently lost. + +**Receiver side:** any UDP server works. Two minimal examples: + +```bash +# Quick inspection with netcat: +nc -u -l 127.0.0.1 9514 + +# Production Go receiver snippet: +conn, _ := net.ListenPacket("udp", ":9514") +buf := make([]byte, 65536) +for { + n, _, _ := conn.ReadFrom(buf) + process(buf[:n]) +} +``` + +See FR-8.1, FR-8.2, FR-8.3, FR-8.4. + +### `ipng_stats;` (scrape handler) + +**Context:** `location`. + +**Value:** no argument. Placed on its own line inside a `location` block. + +**Default:** not set. + +**Effect:** turns the enclosing location into the module's scrape handler. No other content handler (`proxy_pass`, `root`, `return`, +`fastcgi_pass`, ...) may be combined with `ipng_stats;` in the same location. The handler honors: + +- `Accept:` header — `application/json` for JSON, anything else for Prometheus text. +- `?source_tag=` — filter output to only counters whose `source_tag` dimension equals the tag. Exact match, case-sensitive. +- `?vip=
` — filter output to only counters whose `vip` dimension equals the canonicalized address. + +Filters MAY be combined; their effect is the intersection. + +**Security:** the module does not ship authentication. Place an `allow`/`deny` ACL in the same `location` block (or its enclosing +`server`) to control access (NFR-6.2). + +See FR-3.1, FR-3.2, FR-3.3, FR-3.4, FR-3.5. + +## Metric names + +For Prometheus, the module exports under the `nginx_ipng_` prefix. + +| metric | type | labels | meaning | +| --- | --- | --- | --- | +| `nginx_ipng_requests_total` | counter | `source_tag`, `vip`, `code` | Request count per `(source, vip, status_code)`. | +| `nginx_ipng_bytes_in_total` | counter | `source_tag`, `vip`, `code` | Request bytes received (request line + headers + body). | +| `nginx_ipng_bytes_out_total` | counter | `source_tag`, `vip`, `code` | Response bytes sent (status line + headers + body). | +| `nginx_ipng_request_duration_seconds_bucket` | histogram bucket | `source_tag`, `vip`, `le` | Request duration histogram (Prometheus shape). | +| `nginx_ipng_request_duration_seconds_sum` | histogram sum | `source_tag`, `vip` | Sum of observed durations in seconds. | +| `nginx_ipng_request_duration_seconds_count` | histogram count | `source_tag`, `vip` | Count of observations. | +| `nginx_ipng_upstream_response_seconds_bucket` | histogram bucket | `source_tag`, `vip`, `le` | Upstream response time histogram. | +| `nginx_ipng_upstream_response_seconds_sum` | histogram sum | `source_tag`, `vip` | | +| `nginx_ipng_upstream_response_seconds_count` | histogram count | `source_tag`, `vip` | | +| `nginx_ipng_rate_1s` | gauge | `source_tag`, `vip` | EWMA requests/sec, 1-second decay. | +| `nginx_ipng_rate_10s` | gauge | `source_tag`, `vip` | EWMA requests/sec, 10-second decay. | +| `nginx_ipng_rate_60s` | gauge | `source_tag`, `vip` | EWMA requests/sec, 60-second decay. | +| `nginx_ipng_zone_bytes_used` | gauge | — | Shared-memory zone bytes currently allocated. | +| `nginx_ipng_zone_bytes_total` | gauge | — | Shared-memory zone capacity in bytes. | +| `nginx_ipng_zone_full_events_total` | counter | — | Number of key insertions dropped because the zone was full. | +| `nginx_ipng_flushes_total` | counter | `worker` | Number of per-worker flush ticks executed. | +| `nginx_ipng_flush_duration_seconds` | histogram | `worker` | Histogram of flush durations. | +| `nginx_ipng_scrape_duration_seconds` | histogram | — | Histogram of scrape handler runtimes. | + +See FR-2.*, FR-3.7. + +## JSON output shape + +```json +{ + "schema": 1, + "by_source": { + "mg1": { + "vips": { + "192.0.2.10": { + "rate_1s": 42.3, + "rate_10s": 40.1, + "rate_60s": 39.8, + "codes": { + "200": { "requests": 12345, "bytes_in": 9876543, "bytes_out": 54321098 }, + "404": { "requests": 17, "bytes_in": 2048, "bytes_out": 9216 } + }, + "request_duration_ms": { + "buckets": { "1": 10, "5": 40, "10": 120, "25": 350, "50": 870, "100": 2100, + "250": 3400, "500": 4000, "1000": 4100, "2500": 4120, + "5000": 4123, "10000": 4124, "+Inf": 4124 }, + "sum_ms": 87654, + "count": 4124 + }, + "upstream_response_ms": { "...": "..." } + } + } + } + }, + "meta": { + "zone_bytes_used": 131072, + "zone_bytes_total": 4194304, + "zone_full_events": 0 + } +} +``` + +The top-level `schema` field is versioned — breaking changes bump it, additive changes don't. Consumers SHOULD check `schema` +before parsing. + +See FR-3.6. + +## Context summary + +| knob | `http` | `server` | `location` | `listen` | +| --- | --- | --- | --- | --- | +| `ipng_stats_zone` | ✅ | — | — | — | +| `ipng_stats_flush_interval` | ✅ | — | — | — | +| `ipng_stats_default_source` | ✅ | — | — | — | +| `ipng_stats_buckets` | ✅ | — | — | — | +| `ipng_stats_logtail` | ✅ | — | — | — | +| `ipng_stats on\|off` | ✅ | ✅ | ✅ | — | +| `ipng_stats;` (handler) | — | — | ✅ | — | +| `device=` | — | — | — | ✅ | +| `ipng_source_tag=` | — | — | — | ✅ | diff --git a/docs/design.md b/docs/design.md index cb8008e..c3b5615 100644 --- a/docs/design.md +++ b/docs/design.md @@ -1,4 +1,5 @@ -# nginx-vpp-maglev-plugin Design Document + +# nginx-ipng-stats-plugin Design Document ## Metadata @@ -7,7 +8,7 @@ | **Status** | Draft — describes intended behavior for `v0.1.0` | | **Author** | Pim van Pelt `` | | **Last updated** | 2026-04-16 | -| **Audience** | Operators and contributors building the nginx-side observability half of `vpp-maglev` | +| **Audience** | Operators and contributors deploying per-device, per-VIP traffic observability on nginx | The key words **MUST**, **MUST NOT**, **SHOULD**, **SHOULD NOT**, and **MAY** are used as described in [RFC 2119](https://datatracker.ietf.org/doc/html/rfc2119), and are reserved in this document for requirements that are intended to be @@ -16,60 +17,52 @@ lowercase — "can", "will", "does" — and should not be read as normative. ## Summary -`nginx-vpp-maglev-plugin` is a dynamic nginx module and its surrounding Debian packaging. Loaded into stock upstream nginx, the module records -per-VIP traffic counters — requests, status codes, bytes, latency — and attributes them to the specific `vpp-maglev` instance whose GRE -tunnel delivered each connection. A small HTTP scrape endpoint exposes the counters as both Prometheus text and JSON so that -`maglevd-frontend`, Prometheus, and ad-hoc `curl` sessions can all read the same data. The module is the nginx-side answer to the open -question in [`vpp-maglev/docs/design.md`](../../vpp-maglev/docs/design.md) about per-backend traffic counters: VPP's `lb` plugin bypasses -the FIB and cannot produce them, so the backends report what they see. +`nginx-ipng-stats-plugin` is a dynamic nginx module and its surrounding Debian packaging. Loaded into stock upstream nginx, the module +records per-VIP traffic counters — requests, status codes, bytes, latency — and attributes them to the specific interface on which each +connection arrived. A small HTTP scrape endpoint exposes the counters as both Prometheus text and JSON so that Prometheus, custom +dashboards, and ad-hoc `curl` sessions can all read the same data. ## Background -`vpp-maglev` programs VPP's `lb` plugin so that traffic hashed to a VIP lands on a pool of healthy Application Servers (ASes). For the -deployment this module targets, every AS is an nginx instance receiving GRE-encapsulated traffic from one or more `maglevd` daemons, -decapsulating it, and terminating or proxying HTTP and HTTPS as it would for any other inbound client. +Any deployment where traffic arrives on distinct Linux interfaces — GRE tunnels, VLANs, VXLANs, bonded links, or plain ethernet — can +benefit from per-interface traffic visibility. The nginx instances that serve the traffic already observe everything an operator wants to +see — they are the authoritative source for request rate, response code mix, bytes moved, and latency distributions. A small in-process +module emits those numbers on an HTTP endpoint, and consumers scrape the data filtered by source tag. -The design document for `vpp-maglev` identifies **per-AS traffic counters** as an explicit open question: VPP's `lb` fast path bypasses -the FIB, so VPP exposes per-VIP counters in the stats segment but not per-backend ones. An operator looking at the `maglevd-frontend` -status page for a frontend with four backends can see the frontend's aggregate packet rate but not which backend is carrying how much of -it, which errors are concentrated on which backend, or whether one backend's p95 latency is drifting. - -This project closes that gap from the opposite end. The nginx instances that serve the traffic already observe everything an operator -wants to see — they are the authoritative source for request rate, response code mix, bytes moved, and latency distributions. A small -in-process module emits those numbers on an HTTP endpoint, and `maglevd-frontend` fans out to the backends of each frontend and aggregates -the result into the existing status page. +One motivating use case is [`vpp-maglev`](https://git.ipng.ch/ipng/vpp-maglev), where each load-balancer instance terminates a GRE +tunnel on the nginx host. The module attributes traffic per tunnel, letting the frontend show per-backend counters that VPP's fast path +cannot provide. But the module is not coupled to that use case — it works with any interface type and any consumer. ## Goals and Non-Goals ### Product Goals -1. **Per-VIP, per-maglev traffic visibility.** For each VIP, the module records request count, status-code distribution, bytes in and out, - and request-duration histograms, split by which `maglevd` instance delivered the traffic. +1. **Per-VIP, per-device traffic visibility.** For each VIP, the module records request count, status-code distribution, bytes in and + out, and request-duration histograms, split by which interface delivered the traffic. 2. **Negligible hot-path cost.** At steady state, a request traversing an nginx worker with the module loaded pays at most a handful of non-atomic integer increments and a histogram bucket update. No locks, no allocations, no system calls. 3. **Two readers, one endpoint.** A single HTTP location serves both Prometheus text and JSON, so a site running Prometheus and a site - using only the `maglevd-frontend` UI can both consume the module without extra configuration. + using a custom consumer can both consume the module without extra configuration. 4. **Packaging as a dynamic module.** The module builds with nginx's `--with-compat` ABI and ships as a Debian package that loads into stock upstream nginx without recompiling nginx itself. -5. **Composable with normal nginx use.** A host running the module as a maglev backend **and** serving unrelated direct web traffic on the - same ports MUST remain a correct nginx deployment. The module MUST NOT change the semantics of any existing directive; it only adds new - parameters and directives that are no-ops when unused. +5. **Composable with normal nginx use.** A host running the module with device-bound listeners **and** serving unrelated direct web + traffic on the same ports MUST remain a correct nginx deployment. The module MUST NOT change the semantics of any existing directive; + it only adds new parameters and directives that are no-ops when unused. 6. **Graceful reload.** An `nginx -s reload` MUST NOT reset counters, lose history, or drop in-flight connections from the module's point of view. ### Non-Goals - The module is **not** a generic nginx metrics exporter. It does not aim to replace `nginx-module-vts`, `ngx_http_stub_status`, or - `nginx-lua-prometheus`. Its metric set is deliberately narrow and shaped by the `maglevd-frontend` status page. + `nginx-lua-prometheus`. Its metric set is deliberately narrow: per-VIP, per-device counters, histograms, and rate gauges. - The module does **not** terminate TLS, rewrite headers, or alter the request in any way. It is observation-only. -- The module does **not** talk to `maglevd` directly. It does not initiate gRPC, it does not read maglev configuration, and it does not - know which maglev instance owns which VIP. The attribution tag it emits is a string supplied by the operator in the `listen` directive; - nothing more. +- The module does **not** talk to any external daemon. It does not initiate gRPC or read any external configuration. The attribution tag + it emits is a string supplied by the operator in the `listen` directive; nothing more. - The module does **not** provide per-client-IP, per-path, or per-User-Agent counters. Those dimensions explode cardinality and belong in access logs and existing log-analysis tools. - The module does **not** provide persistent storage. Counters live in shared memory for the lifetime of the nginx master process; on restart they start at zero. Consumers who need historical retention SHOULD scrape it from Prometheus. -- The module does **not** own the GRE tunnels, the VIP addresses, or the `SO_BINDTODEVICE` privilege. Tunnel creation, VIP binding, and +- The module does **not** own the interfaces, the VIP addresses, or the `SO_BINDTODEVICE` privilege. Interface creation, VIP binding, and nginx master privileges are the operator's responsibility. ## Requirements @@ -83,11 +76,11 @@ Each requirement carries a unique identifier (`FR-X.Y` or `NFR-X.Y`) so that lat - **FR-1.1** The module MUST support a new parameter on the nginx `listen` directive, `device=`, which causes the resulting listening socket to be created with `SO_BINDTODEVICE` set to the named interface. A listen directive without `device=` MUST create a plain listening socket as stock nginx does. -- **FR-1.2** The module MUST support a new parameter on the nginx `listen` directive, `source=`, which attaches a short string tag to +- **FR-1.2** The module MUST support a new parameter on the nginx `listen` directive, `ipng_source_tag=`, which attaches a short string tag to the listening socket. The tag is the dimension the scrape endpoint exports for every counter that came in on that listener. -- **FR-1.3** A listening socket with neither `device=` nor `source=` MUST be tagged with the configured default source string (see +- **FR-1.3** A listening socket with neither `device=` nor `ipng_source_tag=` MUST be tagged with the configured default source string (see `ipng_stats_default_source`, FR-5.3). The default default is the literal string `direct`. -- **FR-1.4** A listening socket with `device=X` but no `source=` MUST be tagged with the interface name `X`. +- **FR-1.4** A listening socket with `device=X` but no `ipng_source_tag=` MUST be tagged with the interface name `X`. - **FR-1.5** Two `listen` directives that share `address:port` but differ in `device=` MUST coexist, and the kernel's TCP socket lookup rules MUST be relied on to dispatch each SYN to the most specific match. The module MUST NOT attempt to duplicate this logic in userspace. @@ -122,14 +115,14 @@ Each requirement carries a unique identifier (`FR-X.Y` or `NFR-X.Y`) so that lat - **FR-3.2** The `ipng_stats` handler MUST support content negotiation via the `Accept` request header: - `Accept: application/json` → JSON output. - `Accept: text/plain` (or anything else, including absent) → Prometheus text exposition format. -- **FR-3.3** The handler MUST support a `source=` query parameter that filters the output to only counters whose source dimension +- **FR-3.3** The handler MUST support a `source_tag=` query parameter that filters the output to only counters whose source dimension equals the supplied tag. The comparison is exact-match and case-sensitive. - **FR-3.4** The handler MUST support a `vip=
` query parameter that filters the output to only counters whose VIP dimension equals the supplied address. The comparison uses the canonicalized form of FR-2.5. - **FR-3.5** Both filters MAY be supplied together; their effect is the intersection. - **FR-3.6** The JSON schema MUST be documented in `docs/scrape-api.md` and MUST version via a top-level `schema` field so that breaking changes can be made additively without bricking existing consumers. -- **FR-3.7** The Prometheus text output MUST use stable metric names prefixed with `nginx_ipng_` and MUST label every series with `source` +- **FR-3.7** The Prometheus text output MUST use stable metric names prefixed with `nginx_ipng_` and MUST label every series with `source_tag` and `vip`. Counter metrics additionally carry a `code` label. **FR-4 Hot path and flush** @@ -148,28 +141,60 @@ Each requirement carries a unique identifier (`FR-X.Y` or `NFR-X.Y`) so that lat default); `size` is a size with suffix (`k`, `m`). The directive is mandatory if the module is loaded. - **FR-5.2** `ipng_stats_flush_interval ` at the `http` level sets the worker flush cadence. Default `1s`. Minimum `100ms`. - **FR-5.3** `ipng_stats_default_source ` at the `http` level sets the tag applied to listening sockets that have neither `device=` - nor `source=`. Default `direct`. + nor `ipng_source_tag=`. Default `direct`. - **FR-5.4** `ipng_stats_buckets ` at the `http` level overrides the default histogram bucket boundaries. Values MUST be strictly increasing positive integers. - **FR-5.5** `ipng_stats on|off` at the `http`, `server`, or `location` level opts a context into or out of counting. Default `on` at the `http` level when the module is loaded. A location serving the `ipng_stats` handler MUST NOT have itself counted (the module automatically sets `off` for the scrape location). -**FR-6 Packaging** +**FR-6 Variables** -- **FR-6.1** The module MUST build as a dynamic module using nginx's `--with-compat --add-dynamic-module=...` flow, against the nginx-dev +- **FR-6.1** The module MUST register an nginx variable `$ipng_source_tag` that resolves to the source tag of the listening socket that + accepted the current connection. For device-bound listeners this is the `ipng_source_tag=` value (or the `device=` name if + `ipng_source_tag=` was not set); for wildcard fallback listeners this is the value of `ipng_stats_default_source`. The variable is + usable in `log_format`, `map`, `add_header`, `if`, and any other nginx context that accepts variables. +- **FR-6.2** `$ipng_source_tag` MUST be available unconditionally when the module is loaded, even if `ipng_stats_zone` is not + configured. It does not depend on the counter subsystem; it only depends on the listen-parameter parsing. Operators who need the VIP + address should use nginx's built-in `$server_addr` variable. + +**FR-7 Packaging** + +- **FR-7.1** The module MUST build as a dynamic module using nginx's `--with-compat --add-dynamic-module=...` flow, against the nginx-dev headers of the target Debian release, so that the resulting `.so` loads into stock upstream nginx on that release without rebuilding nginx itself. -- **FR-6.2** The module MUST ship as a Debian package named `libnginx-mod-http-ipng-stats`, following the `libnginx-mod-http-*` naming +- **FR-7.2** The module MUST ship as a Debian package named `libnginx-mod-http-ipng-stats`, following the `libnginx-mod-http-*` naming convention used by existing third-party nginx modules packaged for Debian. -- **FR-6.3** The package MUST install: +- **FR-7.3** The package MUST install: - `/usr/lib/nginx/modules/ngx_http_ipng_stats_module.so` - `/etc/nginx/modules-available/50-mod-http-ipng-stats.conf` containing the `load_module` directive. - A symlink `/etc/nginx/modules-enabled/50-mod-http-ipng-stats.conf → ../modules-available/50-mod-http-ipng-stats.conf` created in the package's postinst. -- **FR-6.4** The package postinst MUST run `nginx -t` after installing the module. If the test fails, postinst MUST remove the +- **FR-7.4** The package postinst MUST run `nginx -t` after installing the module. If the test fails, postinst MUST remove the `modules-enabled` symlink and report a non-fatal warning so that a broken upgrade does not leave the operator's nginx unable to start. +**FR-8 Logtail** + +- **FR-8.1** The module MUST support an `ipng_stats_logtail udp://host:port [buffer=] [flush=]` directive + at the `http` level that registers a global log-phase writer which fires unconditionally for every request, regardless of which + `server` or `location` block handled it. One directive at the `http` level is sufficient to cover all vhosts — operators MUST NOT be + required to repeat an `access_log` directive in every `server` block to achieve a single global access log. +- **FR-8.2** The `` argument MUST be the name of an existing nginx `log_format` defined in the same `http` block before + this directive. The module MUST look up the compiled log format from nginx's log module at configuration time and use it to render each + log line at request time. The module MUST NOT define its own format language; all `$variable` expansion is handled by nginx's standard + log-format machinery, including `$ipng_source_tag` and `$server_addr`. +- **FR-8.3** Each worker MUST buffer log lines in a per-worker memory buffer before transmitting them as UDP datagrams. The buffer size + is controlled by the optional `buffer=` parameter (default `64k`, minimum `1k`). The buffer MUST be flushed when it is full, + when the optional `flush=` timer fires (default `1s`, minimum `100ms`), or when the worker exits. This ensures that a + graceful `nginx -s reload` or a clean worker shutdown transmits all buffered log entries. +- **FR-8.4** The destination argument of `ipng_stats_logtail` MUST be a `udp://host:port` URI, where `host` is a literal IPv4 address + (no hostnames, no IPv6 in v0.1). Each buffer flush is transmitted as a single `sendto()` call on a per-worker `SOCK_DGRAM` socket + opened at worker init and closed at worker exit. If no receiver is listening on the target address and port, the kernel silently + discards the datagram — no error is returned, no disk I/O occurs, and the worker is never blocked. Lost datagrams when no receiver is + present are intentional; the UDP transport is designed for fire-and-forget analytics pipelines where delivery guarantees are + unnecessary and zero disk I/O is preferred over persistence. File-based access logging is not supported by this directive — operators + should use nginx's built-in `access_log` for that purpose. + ### Non-Functional Requirements **NFR-1 Correctness under concurrency** @@ -242,7 +267,7 @@ Each requirement carries a unique identifier (`FR-X.Y` or `NFR-X.Y`) so that lat - **NFR-7.1** The repository MUST ship a `docs/user-guide.md` that walks an operator through installing the Debian package, loading the module, configuring a minimal end-to-end deployment (GRE tunnels, VIPs, `listen` lines, scrape endpoint), verifying that counters are - flowing, and integrating the scrape endpoint with both `maglevd-frontend` and a standalone Prometheus scraper. The user guide is the + flowing, and integrating the scrape endpoint with Prometheus and other consumers. The user guide is the document an operator reads once to get from a freshly-installed package to a working, observable deployment. - **NFR-7.2** The repository MUST ship a `docs/config-guide.md` that enumerates every directive and `listen` parameter introduced by the module, together with the nginx configuration contexts (`http`, `server`, `location`, or `listen`) in which each is legal, the allowed @@ -265,26 +290,31 @@ There is no daemon, no socket the module listens on, no control plane. Everythin Requests enter nginx through one of two listener classes: -1. **Device-bound listeners** (`listen ... device=X source=Y`) accept only connections whose ingress interface is `X`. Each is tagged +1. **Device-bound listeners** (`listen ... device=X ipng_source_tag=Y`) accept only connections whose ingress interface is `X`. Each is tagged with a source string `Y`. 2. **Wildcard fallback listeners** (`listen 80;`, `listen [::]:80;`) accept everything that didn't match a more specific listener. They are tagged with the configured default source (FR-1.3). During request processing nginx behaves exactly as it would without the module: no handler runs early, no header is rewritten. At log -phase, the module's log-phase handler increments the worker-local counter table keyed by `(source, vip, status_code)`. +phase, the module's log-phase handler runs two independent responsibilities: + +1. **Counter update** — increments the worker-local counter table keyed by `(source, vip, status_code)`. +2. **Logtail write** — if `ipng_stats_logtail` is configured (FR-8), renders the named `log_format` for this request and appends the + resulting line to the per-worker write buffer. The buffer is flushed as a UDP datagram on a timer, when full, or on worker exit + (FR-8.3, FR-8.4). This runs for every request regardless of `server` or `location` context. A per-worker timer, firing at the configured flush interval (FR-5.2), walks the dirty keys in the worker-local table and applies their -deltas to the shared-memory zone via atomic adds. +deltas to the shared-memory zone via atomic adds. The same timer triggers a logtail buffer flush if the flush duration has elapsed (FR-8.3). -The scrape handler, when invoked at `GET /ipng-stats` (or whatever location the operator chose), reads the shared-memory zone directly +The scrape handler, when invoked at `GET /.well-known/ipng/statsz` (or whatever location the operator chose), reads the shared-memory zone directly and formats the output per the requested content type. -`maglevd-frontend` fetches the scrape endpoint of each backend in its configured fleet at roughly the same cadence it already uses for -maglevd state. It filters server-side via `?source=` so that it only sees the traffic it delivered. The aggregated view is -rendered alongside the existing maglev status page. +Scrape consumers fetch the endpoint at their configured cadence, optionally filtering via `?source_tag=` so that each consumer only +sees the traffic it delivered. -No component in this project writes to anything outside nginx's own memory. In particular, the module does not touch the file system, -does not emit log lines on the request path, and does not speak to any upstream. +Aside from the logtail output (FR-8) — which sends UDP datagrams to a configured receiver — no component in this +project writes to anything outside nginx's own memory. The module does not emit log lines on the request path for the counter subsystem +and does not speak to any upstream. ## Components @@ -295,7 +325,7 @@ dynamic-module ABI. #### Responsibilities -- Parse new `listen` parameters `device=` and `source=` and attach their values to each listening socket's config (FR-1.1, FR-1.2). +- Parse new `listen` parameters `device=` and `ipng_source_tag=` and attach their values to each listening socket's config (FR-1.1, FR-1.2). - Call `setsockopt(SO_BINDTODEVICE)` in the master process at bind time for listeners that set `device=` (FR-1.1, NFR-6.1). - Maintain per-worker private counter tables keyed by `(source_id, vip_id, status_code)` (FR-2.1, NFR-1.1). - Run a per-worker flush timer that moves deltas into the shared-memory zone atomically (FR-4.2, NFR-1.2). @@ -305,22 +335,22 @@ dynamic-module ABI. #### Attribution Model -The module's single novel idea is that per-maglev attribution is done by the Linux kernel's TCP socket lookup, not by any userspace -inspection. Each `maglevd` instance terminates its GRE tunnel on a dedicated interface on the nginx host; the operator writes one -`listen ... device= source=` line per `(family, tunnel)` pair. The kernel binds that listening socket with `SO_BINDTODEVICE`, -which causes it to match only connections whose ingress interface is that tunnel. A wildcard `listen 80;` and `listen [::]:80;` pair -provides the fallback for traffic arriving on any other interface — typically normal web traffic, not from maglev. +The module's single novel idea is that per-device attribution is done by the Linux kernel's TCP socket lookup, not by any userspace +inspection. Each traffic source that should be tracked separately terminates on a dedicated interface on the nginx host; the operator +writes one `listen ... device= ipng_source_tag=` line per `(family, interface)` pair. The kernel binds that listening socket +with `SO_BINDTODEVICE`, which causes it to match only connections whose ingress interface is that device. A wildcard `listen 80;` and +`listen [::]:80;` pair provides the fallback for traffic arriving on any other interface — typically normal web traffic. The kernel's TCP listener lookup prefers a more-specific (device-matching) listener over a less-specific (wildcard) one, so the fallback and the device-bound listeners coexist without conflicts. The module does not need to duplicate this logic and does not try to. -Because the `device=` binding uses a wildcard address, the module does not need to know the set of VIPs served through each tunnel. +Because the `device=` binding uses a wildcard address, the module does not need to know the set of VIPs served through each interface. Adding a VIP (binding an address to `lo` and writing a new `server_name` block) does not require touching the `listen` lines. Adding a -new maglev instance (a new GRE tunnel) does. This is the correct split: VIPs are vhost-level concerns and change often; maglev instances -are fleet-level concerns and change rarely. +new attributed interface does. This is the correct split: VIPs are vhost-level concerns and change often; interfaces are +infrastructure-level concerns and change rarely. -The design assumes GRE tunnels used as `device=` sources carry **only** maglev-originated traffic. Any other traffic arriving on such an -interface is silently misattributed to that maglev's source tag. This is a deployment invariant, not a defect. +The design assumes interfaces used as `device=` sources carry **only** traffic from the expected source. Any other traffic arriving on +such an interface is silently misattributed to that interface's source tag. This is a deployment invariant, not a defect. #### Counter Data Model @@ -344,7 +374,7 @@ endpoint can recover the original strings without re-parsing configuration. String interning is capacity-bounded: the zone is sized by the operator, and once capacity is exhausted new keys are dropped with a counter bump and an infrequent log line (NFR-3.1). In practice, the number of distinct VIPs on a single nginx host is small (tens, maybe -low hundreds), and the number of distinct source tags is the number of maglev instances (single digits). The dominant factor is +low hundreds), and the number of distinct source tags is the number of attributed interfaces (single digits). The dominant factor is `status_code`; ~60 keys per VIP is a typical steady state. #### Hot Path @@ -408,7 +438,7 @@ The worker never walks the entire table — only dirty slots — so idle VIPs co The `ipng_stats` handler is a leaf content handler. It: -1. Parses `?source=` and `?vip=` into exact-match filters. +1. Parses `?source_tag=` and `?vip=` into exact-match filters. 2. Parses `Accept:` to pick output format. 3. Walks the shared-memory zone under a shared lock (readers hold the read side of a rwlock; flushes and interners hold the write side briefly). @@ -423,10 +453,10 @@ fixed-size buffer per chain link and requests new links only when full. - **One nginx content handler**, `ipng_stats`, usable in any `location` block. Serves Prometheus text and JSON, filtered by optional query parameters. -- **Two new `listen` parameters**, `device=` and `source=`, usable anywhere a `listen` directive is used. +- **Two new `listen` parameters**, `device=` and `ipng_source_tag=`, usable anywhere a `listen` directive is used. - **Five new `http`-level directives**: `ipng_stats_zone`, `ipng_stats_flush_interval`, `ipng_stats_default_source`, `ipng_stats_buckets`, `ipng_stats` (on/off). -- **A Prometheus metric family** prefixed `nginx_ipng_*`, labelled `source`, `vip`, and (for request counters) `code`. +- **A Prometheus metric family** prefixed `nginx_ipng_*`, labelled `source_tag`, `vip`, and (for request counters) `code`. **Consumes.** @@ -442,10 +472,10 @@ Debian is the target and upstream nginx on Debian is the platform. #### Responsibilities - Build the module against the target release's nginx-dev headers with `--with-compat` (NFR-5.1, NFR-5.3). -- Install the compiled `.so` into `/usr/lib/nginx/modules` (FR-6.3). +- Install the compiled `.so` into `/usr/lib/nginx/modules` (FR-7.3). - Drop a `load_module` stanza into `/etc/nginx/modules-available/` and enable it by default via a symlink in `modules-enabled/` - (FR-6.3). -- Sanity-check the resulting config with `nginx -t` in the postinst and back out cleanly if it fails (FR-6.4). + (FR-7.3). +- Sanity-check the resulting config with `nginx -t` in the postinst and back out cleanly if it fails (FR-7.4). #### Build @@ -476,12 +506,13 @@ No nginx binary is produced, shipped, or touched. The package is strictly additi A typical deployment on a single nginx host looks like: -- One GRE tunnel per maglev instance, terminated on the nginx host by the operator's networking layer (systemd-networkd, Netplan, or a - hand-rolled interface config). Interface names follow a consistent pattern, typically `gre-` — e.g. `gre-mg1`, `gre-mg2`. -- VIPs bound to a local dummy or loopback interface so the kernel accepts inner packets destined for them. -- A hand-maintained `listen` include file with one device-bound listen per `(family, tunnel)` pair, reused across vhosts. +- One interface per traffic source that should be separately attributed (e.g. GRE tunnels, VLANs), set up by the operator's networking + layer (systemd-networkd, Netplan, or a hand-rolled interface config). Interface names follow a consistent pattern, typically + `gre-` — e.g. `gre-mg1`, `gre-mg2`. +- VIPs bound to a local dummy or loopback interface so the kernel accepts packets destined for them. +- A hand-maintained `listen` include file with one device-bound listen per `(family, interface)` pair, reused across vhosts. - Fallback `listen 80;` and `listen [::]:80;` in whichever server blocks serve direct web traffic. -- A single scrape location, e.g. `location = /ipng-stats`, served from a locked-down server block that only allows the maglev fleet and +- A single scrape location, e.g. `location = /.well-known/ipng/statsz`, served from a locked-down server block that only allows scrape consumers and the local Prometheus scraper. ### Configuration @@ -497,7 +528,7 @@ http { server { listen 80; listen [::]:80; - include /etc/nginx/ipng-maglev/listens.conf; + include /etc/nginx/ipng-stats/listens.conf; server_name _; # ... normal vhost content @@ -505,17 +536,17 @@ http { server { listen 127.0.0.1:9113; - location = /ipng-stats { + location = /.well-known/ipng/statsz { ipng_stats; allow 127.0.0.1; - allow 2001:db8::/48; # maglev fleet + allow 2001:db8::/48; # scrape consumers deny all; } } } ``` -`listens.conf` is eight lines (two families × four maglevs) and stable across vhost changes. +`listens.conf` is two lines per attributed interface (two address families each) and stable across vhost changes. ### Nginx Reload Semantics @@ -550,15 +581,15 @@ some other endpoint. - **nginx master crash / package upgrade.** The shared zone is torn down with the old master. When the new master starts, the zone is recreated empty. Counters start from zero. Consumers that need history SHOULD read from Prometheus, which retains history across restarts. -- **Device disappears.** If an operator removes a GRE tunnel without removing its `listen` line, nginx's bind will fail on the next +- **Device disappears.** If an operator removes an interface without removing its `listen` line, nginx's bind will fail on the next reload and the reload will error cleanly. The module does not hide this; a failing `nginx -t` is the right answer. - **Traffic on a wildcard listener that should have been device-bound.** The traffic is counted under `direct` (or the configured - default). This is detectable: if the operator expects zero traffic under `direct` and the dashboard shows non-zero, a maglev instance - is probably missing from the `listen` include. + default). This is detectable: if the operator expects zero traffic under `direct` and the dashboard shows non-zero, an interface is + probably missing from the `listen` include. - **Slow scrape on a large zone.** Scrape cost is linear in the number of keys (NFR-2.3). On a host with a very large VIP count, the operator SHOULD increase the flush interval, lower the scrape frequency, or both. The module does not cap scrape runtime. -- **Maglev frontend is down.** The module is unaffected; its counters continue to increment and the Prometheus scrape continues to work. - When the frontend comes back, it resumes fetching. No state is lost. +- **Scrape consumer is down.** The module is unaffected; its counters continue to increment and the Prometheus scrape continues to work. + When the consumer comes back, it resumes fetching. No state is lost. ### Security @@ -586,18 +617,16 @@ some other endpoint. decapsulation; the outer and inner conntrack entries are independent and mark does not cross. Even if tagging worked, `SO_MARK` on an accepted socket does not reflect incoming packet or conntrack mark without a per-packet `libnetfilter_conntrack` lookup, which is too heavy for a log-phase handler. -- **Attribution via multiple GRE tunnels and CONNMARK.** Rejected as strictly worse than `SO_BINDTODEVICE`: it still requires per-maglev +- **Attribution via multiple GRE tunnels and CONNMARK.** Rejected as strictly worse than `SO_BINDTODEVICE`: it still requires per-source tunnels, still needs nginx to read the mark (hard), and adds a netfilter dependency. `SO_BINDTODEVICE` solves the same problem with kernel primitives nginx already knows about. - **Attribution via eBPF `SO_REUSEPORT` programs.** Rejected as dramatic overkill for a problem the kernel already solves for free via socket-lookup specificity. -- **Per-VIP enumeration in `listen` directives.** Rejected in favor of wildcard `listen 80 device=gre-mg1;`. The wildcard form works +- **Per-VIP enumeration in `listen` directives.** Rejected in favor of wildcard `listen 80 device=gre-mg1 ipng_source_tag=mg1;`. The wildcard form works because nginx routes by `server_name` post-accept, so the `listen` only needs to express `(port, device)` and does not need the VIP address. This makes the generated include file size independent of the VIP count. -- **Pushing counters from the module into `maglevd` over gRPC.** Rejected. It inverts the wait-for graph (maglevd's design doc is - careful to keep the daemon free of callbacks from the backends), it complicates restart neutrality, and it adds a gRPC client to a C - module. Pull-based scrape keeps maglevd out of the traffic-metrics business, matches the doc's philosophy, and lets the frontend use - its existing per-server goroutine model. +- **Pushing counters to an external daemon over gRPC.** Rejected. It complicates restart neutrality and adds a gRPC client dependency to + a C module. Pull-based scrape is simpler: consumers fetch when they want, and the module has no outbound connections. - **Shipping separate JSON and Prometheus handlers.** Rejected. Content negotiation on one handler is simpler to configure and serves both audiences from one ACL. @@ -609,5 +638,5 @@ some other endpoint. - **TLS handshake metrics.** The module reports `request_duration` from the start of the HTTP request, not from TCP accept. For TLS-terminating frontends a handshake-time fraction is invisible. Adding a `tls_handshake_duration` histogram is deferred until operators ask for it. -- **`maglevd-frontend` fetch cadence.** Whichever cadence the frontend adopts for traffic counters — the existing ~one-second refresh, - or an SSE bridge layered on top — the plugin supports it. The choice is on the frontend side. +- **Consumer fetch cadence.** Whichever cadence a consumer adopts for traffic counters — a one-second refresh, a longer Prometheus + scrape interval, or an SSE bridge layered on top — the plugin supports it. The choice is on the consumer side. diff --git a/docs/user-guide.md b/docs/user-guide.md new file mode 100644 index 0000000..be42801 --- /dev/null +++ b/docs/user-guide.md @@ -0,0 +1,384 @@ + +# nginx-ipng-stats-plugin — User Guide + +This document walks an operator through installing the plugin, deploying it on a single nginx host serving traffic that arrives on +distinct interfaces (GRE tunnels, VLANs, bonded links, or plain ethernet), verifying that counters are flowing, and hooking up the +scrape endpoint to Prometheus and other consumers. + +It covers (NFR-7.1): + +1. Installing the Debian package. +2. Setting up interfaces for per-device attribution (GRE tunnel example). +3. Writing a minimal nginx configuration. +4. Verifying with `curl`. +5. Scraping from Prometheus. +6. Setting up a global logtail access log. +7. Integrating with scrape consumers. + +For a directive-by-directive reference, read [`config-guide.md`](config-guide.md) alongside this guide. + +## 1. Install the package + +On Debian Trixie (and newer), the module is distributed as `libnginx-mod-http-ipng-stats`. The package depends on the stock `nginx` +package and loads cleanly into it without recompiling nginx itself. + +``` +sudo apt install ./libnginx-mod-http-ipng-stats_0.1.0-1_amd64.deb +``` + +The package will: + +- Drop `ngx_http_ipng_stats_module.so` into `/usr/lib/nginx/modules/`. +- Place a `load_module` stanza in `/etc/nginx/modules-available/50-mod-http-ipng-stats.conf`. +- Symlink it into `/etc/nginx/modules-enabled/` so nginx picks it up on the next reload. +- Run `nginx -t` and, if the test fails, remove the `modules-enabled` symlink and print a warning — so a broken upgrade never leaves + you with an nginx that cannot start. + +Confirm the module is loaded: + +``` +nginx -V 2>&1 | grep -o ngx_http_ipng_stats_module +``` + +## 2. Set up interfaces for per-device attribution + +The plugin attributes traffic by watching which interface the request came in on, using `SO_BINDTODEVICE` on per-interface listening +sockets. For this to work, each traffic source that should be tracked separately MUST arrive on its own interface. + +This works with any kind of Linux interface — GRE tunnels, VLANs, VXLANs, bonded links, or plain ethernet. This guide uses GRE +tunnels as the example, but the module does not care about the interface type. + +This guide doesn't prescribe a specific networking layer — use whatever your host already uses (`systemd-networkd`, Netplan, +`/etc/network/interfaces`, or a hand-rolled script). The only hard requirement is: + +- Each traffic source that should be separately attributed gets its own interface on the nginx host. +- Interfaces follow a consistent naming pattern. For GRE tunnels we recommend `gre-`, e.g. `gre-mg1`, `gre-mg2`. +- The VIPs are bound to a local dummy or loopback interface so the kernel accepts packets destined for them. + +For example, with `systemd-networkd`, a GRE tunnel to a remote peer at `2001:db8::1` from this host at `2001:db8::100` looks like: + +``` +# /etc/systemd/network/10-gre-mg1.netdev +[NetDev] +Name=gre-mg1 +Kind=ip6gre + +[Tunnel] +Local=2001:db8::100 +Remote=2001:db8::1 +TTL=64 +``` + +``` +# /etc/systemd/network/10-gre-mg1.network +[Match] +Name=gre-mg1 + +[Network] +LinkLocalAddressing=no +``` + +Repeat for each additional tunnel. A trimmed-down variant of this scheme is what IPng uses in production. + +Verify the interfaces exist and carry traffic: + +``` +ip -6 tunnel show | grep gre-mg +ip -6 -s link show gre-mg1 +``` + +## 3. Write the nginx configuration + +The plugin needs three things in `nginx.conf`: + +1. A shared-memory zone for counters (`ipng_stats_zone`). +2. A set of `listen` directives — a wildcard fallback plus one device-bound listener per attributed interface. +3. A scrape location serving the `ipng_stats` handler. + +A minimal working configuration looks like this: + +```nginx +load_module modules/ngx_http_ipng_stats_module.so; + +events { + worker_connections 4096; +} + +http { + ipng_stats_zone ipng:4m; + ipng_stats_flush_interval 1s; + ipng_stats_default_source direct; + + # A normal vhost. The fallback listen lines serve direct web traffic; + # the included file adds one device-bound listen per attributed interface. + server { + listen 80; + listen [::]:80; + include /etc/nginx/ipng-stats/listens.conf; + + server_name _; + root /var/www/html; + } + + # A second server block exposing the scrape endpoint on a locked-down port. + server { + listen 127.0.0.1:9113; + listen [::1]:9113; + + location = /.well-known/ipng/statsz { + ipng_stats; + allow 127.0.0.1; + allow ::1; + allow 2001:db8::/48; # your scrape consumers + deny all; + } + } +} +``` + +And `/etc/nginx/ipng-stats/listens.conf` — the hand-maintained include file — is two lines per attributed interface (one per address +family): + +```nginx +listen 80 device=gre-mg1 ipng_source_tag=mg1; +listen [::]:80 device=gre-mg1 ipng_source_tag=mg1; +listen 80 device=gre-mg2 ipng_source_tag=mg2; +listen [::]:80 device=gre-mg2 ipng_source_tag=mg2; +listen 80 device=gre-mg3 ipng_source_tag=mg3; +listen [::]:80 device=gre-mg3 ipng_source_tag=mg3; +listen 80 device=gre-mg4 ipng_source_tag=mg4; +listen [::]:80 device=gre-mg4 ipng_source_tag=mg4; +``` + +Test and reload: + +``` +sudo nginx -t +sudo nginx -s reload +``` + +If `nginx -t` complains about an unknown `listen` parameter (`device=` or `ipng_source_tag=`), the module isn't loaded — check step 1. + +### Why wildcard listens? + +You do not need to enumerate VIPs in `listen`. A wildcard `listen 80 device=gre-mg1 ipng_source_tag=mg1;` accepts any local address +served through the `gre-mg1` interface, and nginx routes per-request to the right vhost by `server_name` / `Host:` header. Adding a new +VIP is a `server_name` change; adding a new interface is an append to `listens.conf`. + +### Why both a wildcard and device-bound listens? + +The fallback `listen 80;` / `listen [::]:80;` catches traffic arriving on any interface that isn't one of your attributed interfaces — +for example, real clients hitting your host directly over `eth0`. The kernel's TCP socket lookup prefers the most-specific +(device-matching) listener, so a SYN on `gre-mg1` always lands on the `mg1` socket, and a SYN on `eth0` always lands on the fallback. +No races, no stealing. Direct traffic is counted under the tag set by `ipng_stats_default_source` (`direct` by default). + +## 4. Verify with curl + +Generate some traffic (or wait for real traffic), then scrape the endpoint locally: + +``` +curl -s http://127.0.0.1:9113/.well-known/ipng/statsz +``` + +Default output is Prometheus text format: + +``` +# HELP nginx_ipng_requests_total Total HTTP requests, per (source_tag, vip, code). +# TYPE nginx_ipng_requests_total counter +nginx_ipng_requests_total{source_tag="mg1",vip="192.0.2.10",code="200"} 12345 +nginx_ipng_requests_total{source_tag="mg1",vip="192.0.2.10",code="404"} 17 +nginx_ipng_requests_total{source_tag="mg2",vip="192.0.2.10",code="200"} 9876 +nginx_ipng_requests_total{source_tag="direct",vip="192.0.2.10",code="200"} 42 +# HELP nginx_ipng_bytes_in_total Request bytes received, per (source_tag, vip, code). +# TYPE nginx_ipng_bytes_in_total counter +nginx_ipng_bytes_in_total{source_tag="mg1",vip="192.0.2.10",code="200"} 9876543 +# ... and so on +``` + +For JSON output instead, set the `Accept` header: + +``` +curl -s -H 'Accept: application/json' http://127.0.0.1:9113/.well-known/ipng/statsz | jq . +``` + +To filter server-side to a single source tag: + +``` +curl -s 'http://127.0.0.1:9113/.well-known/ipng/statsz?source_tag=mg1' +curl -s 'http://127.0.0.1:9113/.well-known/ipng/statsz?source_tag=mg1&vip=192.0.2.10' +``` + +If you see `source_tag="direct"` entries with non-zero counts and you expected all traffic to come in via attributed interfaces, +something is routing around them — typically an interface that isn't in `listens.conf`, or an interface that's down. + +## 5. Scrape from Prometheus + +The same endpoint serves Prometheus text by default. Add a scrape job: + +```yaml +# /etc/prometheus/prometheus.yml +scrape_configs: + - job_name: nginx-ipng + scrape_interval: 15s + static_configs: + - targets: + - 'nginx-backend-1.example.com:9113' + - 'nginx-backend-2.example.com:9113' + metrics_path: /.well-known/ipng/statsz +``` + +You'll want to add `nginx-backend-*` to your `allow` rules in the scrape server block, or front the plugin with a TLS-terminating +reverse proxy. The module does not ship its own auth; the nginx `allow`/`deny` ACL is your access control. + +Typical PromQL queries: + +``` +# Requests per second per source, per VIP: +sum by (source_tag, vip) (rate(nginx_ipng_requests_total[1m])) + +# 5xx error rate per VIP, aggregated across all sources: +sum by (vip) (rate(nginx_ipng_requests_total{code=~"5.."}[5m])) + / +sum by (vip) (rate(nginx_ipng_requests_total[5m])) + +# p95 request duration per (source_tag, vip): +histogram_quantile(0.95, + sum by (source_tag, vip, le) (rate(nginx_ipng_request_duration_seconds_bucket[5m]))) +``` + +## 6. Set up a global logtail access log + +Operators who want a single unified access log covering all traffic — regardless of which `server` block handled the request — normally +have to repeat `access_log` in every `server {}` block or rely on a catch-all virtual host. The `ipng_stats_logtail` directive removes +that requirement: one line at the `http` level registers a global log-phase writer that fires unconditionally for every request (FR-8.1). + +The logtail sends each buffer flush as a single UDP datagram to a `host:port`. Zero disk I/O, no backpressure, no blocking if the +receiver is down. This makes it ideal for fire-and-forget analytics pipelines where delivery guarantees are unnecessary and disk writes +would add unwanted I/O pressure. For file-based access logging, use nginx's built-in `access_log` directive. + +### Define the log format + +Add a `log_format` declaration inside the `http { ... }` block, **before** the `ipng_stats_logtail` directive that references it: + +```nginx +log_format logtail '$host\t$remote_addr\t$ipng_source_tag\t$server_addr\t' + '$request_method\t$request_uri\t$status\t$body_bytes_sent\t' + '$request_time'; +``` + +Any nginx variable is usable here, including `$ipng_source_tag` (the device attribution tag, FR-6.1) and `$server_addr` (the VIP +that received the request). + +### Configuration + +```nginx +http { + ipng_stats_zone ipng:4m; + + log_format logtail '$host\t$remote_addr\t$ipng_source_tag\t$server_addr\t' + '$request_method\t$request_uri\t$status\t$body_bytes_sent\t' + '$request_time'; + + ipng_stats_logtail logtail udp://127.0.0.1:9514 buffer=16k flush=1s; + + server { ... } +} +``` + +- **`logtail`** (first argument) — the `log_format` name. +- **`udp://127.0.0.1:9514`** — destination as a `udp://host:port` URI. `host` must be a literal IPv4 address (no hostnames, no IPv6 + in v0.1). +- **`buffer=16k`** — per-worker write buffer. Lines are held in memory until the buffer fills, the flush timer fires, or the worker + exits. Default is `64k`; minimum is `1k` (FR-8.3). +- **`flush=1s`** — maximum age of buffered data before it is sent. Default is `1s`; minimum is `100ms` (FR-8.3). + +Each buffer flush becomes a single `sendto()` on a per-worker `SOCK_DGRAM` socket. When the flush timer fires (or the buffer fills), +the entire buffered payload is sent as one datagram — no file open, no `write()`, no `fsync()`. If no receiver is listening, the kernel +drops the datagram silently and the worker carries on. This is by design: the logtail exists for non-critical analytics pipes where +lost datagrams are acceptable and disk I/O is not. + +**Constraints (v0.1):** + +- `host` must be a literal IPv4 address. Hostnames and IPv6 are not supported yet. +- Large `buffer=` values produce large datagrams. On the loopback interface the practical ceiling is ~64 KB, well above typical + configured buffer sizes. On routed paths, path MTU applies. +- There is no acknowledgment, retry, or sequence number. If the receiver is down, the data is gone. + +**Starting a receiver** is trivial: + +```bash +# Quick one-shot inspection: +nc -u -l 127.0.0.1 9514 +``` + +For a production-ready logtail consumer, see [`nginx-logtail`](https://git.ipng.ch/ipng/nginx-logtail), which receives the UDP +datagram stream and processes it into structured log output. + +A typical received log line (with the format above, tab-separated) looks like: + +``` +example.com 203.0.113.42 mg1 192.0.2.10 GET /index.html 200 4321 0.003 +``` + +The third field (`mg1`) comes from `$ipng_source_tag` — free per-device attribution in every log line. + +### Why this complements per-server `access_log` + +A conventional nginx access log requires the operator to repeat `access_log /path/to/file logtail;` in every `server {}` block that +should be captured. This is error-prone: adding a new vhost and forgetting the directive means that vhost's traffic is silently absent +from the log. `ipng_stats_logtail` is installed at the module's log-phase hook, which nginx calls for every request with no per-server +configuration required. + +See [`config-guide.md`](config-guide.md#ipng_stats_logtail-format_name-udphostport-buffersize-flushduration) for the full directive +reference and FR-8 for the requirements behind this feature. + +## 7. Integrate with scrape consumers + +The scrape endpoint (`ipng_stats;`) serves both Prometheus text and JSON from a single location. Any HTTP client that can issue a GET +request can consume it. Two integration patterns are common: + +### Prometheus + +See section 5 above. Prometheus scrapes the endpoint at a configured interval and stores the time series. This is the simplest +integration and covers most monitoring and alerting use cases. + +### Custom consumers + +The `?source_tag=` query parameter lets a consumer filter the scrape response to only the traffic attributed to a specific source. +This is useful when multiple consumers share the same nginx backends — each consumer scrapes with its own tag and never sees the +others' traffic. + +The JSON output (`Accept: application/json`) includes a top-level `schema` field for versioning, making it straightforward to parse +from any language. + +Once wired, a consumer can derive from the scrape data: + +- Live QPS per backend (from the EWMA gauges). +- Status-code mix per backend (from the counter families). +- p50/p95 latency per backend (from the duration histogram). +- Traffic volume per backend (from the bytes counters). + +For an example of this pattern in a GRE tunnel fleet, see [`vpp-maglev`](https://git.ipng.ch/ipng/vpp-maglev), whose frontend scrapes +each nginx backend filtered by source tag to show per-backend traffic alongside health state. + +## Troubleshooting + +**`nginx -t` reports "unknown listen parameter: device=" or "unknown listen parameter: ipng_source_tag=".** The module isn't loaded. +Check `/etc/nginx/modules-enabled/` for the `50-mod-http-ipng-stats.conf` symlink and re-run `nginx -t`. + +**All traffic is attributed to `direct` even though device-bound interfaces exist.** The interface names don't match the `device=` +values in `listens.conf`, or the interfaces aren't up. Run `ip -br link` and confirm the interface names match. + +**Counters reset after every reload.** They should survive `nginx -s reload`. If they don't, check that the `ipng_stats_zone` name in +`nginx.conf` is stable across reloads — renaming the zone forces a new shared-memory segment. + +**`nginx_ipng_zone_full_events_total` is non-zero.** The shared-memory zone is too small for your VIP count. Increase the size in +`ipng_stats_zone ipng:` (default 4 MB is enough for ~hundreds of VIPs with the full status-code set). + +**`curl http://127.0.0.1:9113/.well-known/ipng/statsz` returns "403 Forbidden".** The `allow`/`deny` ACL is blocking your source address. Either add +yourself or scrape from a host already in the allow list. + +## Where to go next + +- [`config-guide.md`](config-guide.md) — every directive and `listen` parameter with contexts, allowed values, and defaults. +- [`design.md`](design.md) — full design document, including the attribution model, hot-path cost analysis, and failure modes. diff --git a/src/ngx_http_ipng_stats_module.c b/src/ngx_http_ipng_stats_module.c new file mode 100644 index 0000000..6f4ba52 --- /dev/null +++ b/src/ngx_http_ipng_stats_module.c @@ -0,0 +1,2265 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2026 Pim van Pelt + * + * ngx_http_ipng_stats_module — per-VIP, per-device traffic counters for + * nginx hosts receiving traffic on distinct interfaces. + * + * See docs/design.md in the repository for the full design. The short + * version is: + * + * - Attribution is done by the Linux kernel's TCP socket lookup, via + * SO_BINDTODEVICE on per-tunnel listening sockets. Each `listen` + * directive may carry `device=` and `ipng_source_tag=` + * parameter; this module parses them by replacing the stock + * ngx_http_core_module `listen` command handler at preconfig time. + * + * - Counters are maintained per-worker in a private table (no locks, + * no atomics on the request path) and flushed into a shared-memory + * zone via a per-worker timer. The scrape handler reads only from + * the shared zone. + * + * - The scrape handler content-negotiates between Prometheus text and + * JSON output, filtered server-side by optional `?source_tag=` and + * `?vip=` query parameters. + */ + +#include +#include +#include + +#include +#include +#include + + +/* The log module's op/format types are private to ngx_http_log_module.c + * (no public header). We duplicate the struct layouts under our own + * names so we can call the compiled log_format ops. The layouts have + * been stable since nginx 0.7.x. */ + +typedef struct ipng_log_op_s ipng_log_op_t; + +typedef u_char *(*ipng_log_op_run_pt)(ngx_http_request_t *r, u_char *buf, + ipng_log_op_t *op); +typedef size_t (*ipng_log_op_getlen_pt)(ngx_http_request_t *r, + uintptr_t data); + +struct ipng_log_op_s { + size_t len; + ipng_log_op_getlen_pt getlen; + ipng_log_op_run_pt run; + uintptr_t data; +}; + +typedef struct { + ngx_str_t name; + ngx_array_t *flushes; + ngx_array_t *ops; /* array of ipng_log_op_t */ +} ipng_log_fmt_t; + +typedef struct { + ngx_array_t formats; /* array of ipng_log_fmt_t */ + ngx_uint_t combined_used; +} ipng_log_main_conf_t; + +extern ngx_module_t ngx_http_log_module; + + +#define NGX_HTTP_IPNG_STATS_VERSION "0.1.0" +#define NGX_HTTP_IPNG_STATS_SCHEMA_VERSION 1 + +/* Default histogram buckets in milliseconds (FR-2.3). */ +#define NGX_HTTP_IPNG_STATS_DEFAULT_BUCKETS \ + { 1, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000 } +#define NGX_HTTP_IPNG_STATS_DEFAULT_BUCKET_COUNT 12 + +/* Number of status code slots. We keep one lane per HTTP status code in + * [100, 599]; codes outside this range are clamped into a single + * "unknown" bucket (NFR-3.3). */ +#define NGX_HTTP_IPNG_STATS_CODE_MIN 100 +#define NGX_HTTP_IPNG_STATS_CODE_MAX 599 +#define NGX_HTTP_IPNG_STATS_CODE_UNKNOWN 0 + +/* A single per-worker slot key. */ +typedef struct { + ngx_str_t source; /* points into shared-zone interning table */ + ngx_str_t vip; /* ditto */ + ngx_uint_t code; /* 0 for unknown, else full 3-digit code */ +} ngx_http_ipng_stats_key_t; + + +/* Shared-zone node: stored in an rbtree keyed by a 32-bit hash of the + * (source, vip, code) tuple. Collisions are resolved by walking the + * rbtree's duplicate chain and comparing full keys. + * + * Counter lanes are 64-bit and updated via relaxed atomic fetch_add + * (NFR-1.2). Histogram lanes live in an ngx_palloc'd tail allocation + * whose size is determined by the configured bucket count. */ +typedef struct { + ngx_rbtree_node_t rbnode; /* key = hash */ + ngx_queue_t lru; /* in-zone LRU for eviction on rename (NFR-4.4) */ + ngx_uint_t source_id; + ngx_uint_t vip_id; + ngx_uint_t code; + ngx_atomic_uint_t requests; + ngx_atomic_uint_t bytes_in; + ngx_atomic_uint_t bytes_out; + ngx_atomic_uint_t duration_sum_ms; + ngx_atomic_uint_t upstream_sum_ms; + /* Followed by 2 * (nbuckets + 1) ngx_atomic_uint_t histogram lanes: + * lanes[0 .. nbuckets] -> request duration + * lanes[nbuckets+1 .. 2*nbuckets+1] -> upstream duration + */ +} ngx_http_ipng_stats_node_t; + + +/* Per-worker local slot — identical shape to the shared node but without + * atomics. We maintain a small dynamic array of these, plus a "dirty + * list" head pointing into the array, so that the flush tick only walks + * entries touched since the last flush (FR-4.2, NFR-2.2). */ +typedef struct ngx_http_ipng_stats_slot_s { + ngx_uint_t hash; + ngx_uint_t source_id; + ngx_uint_t vip_id; + ngx_uint_t code; + + /* Deltas since last flush. */ + uint64_t requests; + uint64_t bytes_in; + uint64_t bytes_out; + uint64_t duration_sum_ms; + uint64_t upstream_sum_ms; + uint64_t *dhist; /* nbuckets+1 lanes */ + uint64_t *uhist; + + /* Intrusive "dirty" linked list — dirty_next == NULL and + * !is_dirty_head means "not on the list". */ + struct ngx_http_ipng_stats_slot_s *dirty_next; + unsigned dirty:1; +} ngx_http_ipng_stats_slot_t; + + +/* String interning tables live at the head of the shared-memory zone. + * They're flat arrays of ngx_str_t whose data pointers reference memory + * allocated from the zone's slab pool. Workers look up strings by + * sequential scan — the cardinality is small (tens) so this is fine. */ +typedef struct { + ngx_str_t *entries; + ngx_uint_t nelts; + ngx_uint_t nalloc; +} ngx_http_ipng_stats_intern_t; + + +/* Header of the shared-memory zone. */ +typedef struct { + uint32_t magic; /* 0x49504E47 "IPNG" when initialized */ + ngx_rbtree_t rbtree; + ngx_rbtree_node_t sentinel; + ngx_queue_t lru; + ngx_http_ipng_stats_intern_t sources; + ngx_http_ipng_stats_intern_t vips; + + /* Meta-counters for the plugin itself (FR-6 observability of + * the plugin in the design doc). */ + ngx_atomic_uint_t zone_full_events; + ngx_atomic_uint_t flushes_total; +} ngx_http_ipng_stats_shctx_t; + + +/* Per-listen binding recorded by the listen wrapper at config parse + * time. Resolved to an ngx_listening_t* at init_module time. */ +typedef struct { + ngx_str_t device; + ngx_str_t source; + ngx_sockaddr_t sockaddr; + socklen_t socklen; + ngx_listening_t *listening; /* filled at init_module */ +} ngx_http_ipng_stats_binding_t; + + +typedef struct { + ngx_shm_zone_t *shm_zone; + ngx_str_t zone_name; + size_t zone_size; + ngx_msec_t flush_interval; + ngx_str_t default_source; + ngx_uint_t nbuckets; + ngx_uint_t *bucket_bounds_ms; /* len = nbuckets */ + ngx_array_t *bindings; /* ngx_http_ipng_stats_binding_t */ + ngx_flag_t enabled; + + /* Global logtail (FR-8) — UDP-only. */ + ipng_log_fmt_t *logtail_fmt; /* compiled ops from log_format */ + struct sockaddr_in logtail_udp_addr; /* destination address */ + size_t logtail_buf_size; /* per-worker buffer, default 64k */ + ngx_msec_t logtail_flush; /* max flush interval, default 1s */ +} ngx_http_ipng_stats_main_conf_t; + + +typedef struct { + ngx_flag_t enabled; + ngx_flag_t is_scrape_handler; +} ngx_http_ipng_stats_loc_conf_t; + + +/* Per-worker runtime state. */ +typedef struct { + ngx_http_ipng_stats_slot_t *slots; + ngx_uint_t nslots; + ngx_uint_t nalloc; + ngx_http_ipng_stats_slot_t *dirty_head; + ngx_event_t flush_ev; + ngx_log_t *log; + uint64_t *dhist_arena; /* nslots * (nbuckets+1) u64 */ + uint64_t *uhist_arena; + + /* Global logtail buffer. */ + u_char *logtail_buf; + u_char *logtail_pos; + u_char *logtail_end; + ngx_event_t logtail_flush_ev; + ngx_socket_t logtail_udp_fd; /* per-worker UDP socket, or -1 */ +} ngx_http_ipng_stats_worker_t; + + +/* Forward decls. */ + +static ngx_int_t ngx_http_ipng_stats_preconfig(ngx_conf_t *cf); +static ngx_int_t ngx_http_ipng_stats_postconfig(ngx_conf_t *cf); + +static void *ngx_http_ipng_stats_create_main_conf(ngx_conf_t *cf); +static char *ngx_http_ipng_stats_init_main_conf(ngx_conf_t *cf, void *conf); +static void *ngx_http_ipng_stats_create_loc_conf(ngx_conf_t *cf); +static char *ngx_http_ipng_stats_merge_loc_conf(ngx_conf_t *cf, void *parent, + void *child); + +static char *ngx_http_ipng_stats_zone(ngx_conf_t *cf, ngx_command_t *cmd, + void *conf); +static char *ngx_http_ipng_stats_buckets(ngx_conf_t *cf, ngx_command_t *cmd, + void *conf); +static char *ngx_http_ipng_stats_scrape(ngx_conf_t *cf, ngx_command_t *cmd, + void *conf); +static char *ngx_http_ipng_stats_logtail(ngx_conf_t *cf, ngx_command_t *cmd, + void *conf); + +static void ngx_http_ipng_stats_logtail_flush_handler(ngx_event_t *ev); +static void ngx_http_ipng_stats_logtail_flush(ngx_http_ipng_stats_worker_t *w, + ngx_http_ipng_stats_main_conf_t *imcf); +static void ngx_http_ipng_stats_logtail_write(ngx_http_request_t *r, + ngx_http_ipng_stats_main_conf_t *imcf, + ngx_http_ipng_stats_worker_t *w); + +static char *ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, + ngx_command_t *cmd, void *conf); + +static ngx_int_t ngx_http_ipng_stats_init_zone(ngx_shm_zone_t *shm_zone, + void *data); +static ngx_int_t ngx_http_ipng_stats_init_module(ngx_cycle_t *cycle); +static ngx_int_t ngx_http_ipng_stats_init_worker(ngx_cycle_t *cycle); +static void ngx_http_ipng_stats_exit_worker(ngx_cycle_t *cycle); + +static ngx_int_t ngx_http_ipng_stats_log_handler(ngx_http_request_t *r); +static ngx_int_t ngx_http_ipng_stats_content_handler(ngx_http_request_t *r); + +static void ngx_http_ipng_stats_flush_timer(ngx_event_t *ev); +static void ngx_http_ipng_stats_do_flush(ngx_http_ipng_stats_worker_t *w, + ngx_http_ipng_stats_main_conf_t *imcf, ngx_log_t *log); + +static ngx_uint_t ngx_http_ipng_stats_bucket_index(ngx_uint_t ms, + ngx_uint_t *bounds, ngx_uint_t nbuckets); +static ngx_uint_t ngx_http_ipng_stats_status_index(ngx_uint_t code); + +static ngx_int_t ngx_http_ipng_stats_resolve_source( + ngx_http_request_t *r, ngx_http_ipng_stats_main_conf_t *imcf, + ngx_str_t *source_out); +static ngx_int_t ngx_http_ipng_stats_canonical_vip(ngx_http_request_t *r, + u_char *buf, size_t buflen, ngx_str_t *vip_out); + +static ngx_int_t ngx_http_ipng_stats_intern_shared( + ngx_http_ipng_stats_shctx_t *sh, ngx_slab_pool_t *slab, + ngx_http_ipng_stats_intern_t *t, ngx_str_t *s, ngx_uint_t *idx_out); + +static ngx_int_t ngx_http_ipng_stats_source_variable(ngx_http_request_t *r, + ngx_http_variable_value_t *v, uintptr_t data); +static ngx_int_t ngx_http_ipng_stats_render_prom(ngx_http_request_t *r, + ngx_http_ipng_stats_main_conf_t *imcf, + ngx_str_t *filter_source, ngx_str_t *filter_vip); +static ngx_int_t ngx_http_ipng_stats_render_json(ngx_http_request_t *r, + ngx_http_ipng_stats_main_conf_t *imcf, + ngx_str_t *filter_source, ngx_str_t *filter_vip); + + +/* Module-wide globals. */ + +static char *(*ngx_http_core_listen_orig)(ngx_conf_t *cf, + ngx_command_t *cmd, void *conf) = NULL; + +static ngx_http_ipng_stats_worker_t ngx_http_ipng_stats_worker; + +extern ngx_module_t ngx_http_core_module; + + +/* Directives. */ + +static ngx_command_t ngx_http_ipng_stats_commands[] = { + + { ngx_string("ipng_stats_zone"), + NGX_HTTP_MAIN_CONF|NGX_CONF_TAKE1, + ngx_http_ipng_stats_zone, + NGX_HTTP_MAIN_CONF_OFFSET, + 0, + NULL }, + + { ngx_string("ipng_stats_flush_interval"), + NGX_HTTP_MAIN_CONF|NGX_CONF_TAKE1, + ngx_conf_set_msec_slot, + NGX_HTTP_MAIN_CONF_OFFSET, + offsetof(ngx_http_ipng_stats_main_conf_t, flush_interval), + NULL }, + + { ngx_string("ipng_stats_default_source"), + NGX_HTTP_MAIN_CONF|NGX_CONF_TAKE1, + ngx_conf_set_str_slot, + NGX_HTTP_MAIN_CONF_OFFSET, + offsetof(ngx_http_ipng_stats_main_conf_t, default_source), + NULL }, + + { ngx_string("ipng_stats_buckets"), + NGX_HTTP_MAIN_CONF|NGX_CONF_1MORE, + ngx_http_ipng_stats_buckets, + NGX_HTTP_MAIN_CONF_OFFSET, + 0, + NULL }, + + { ngx_string("ipng_stats"), + NGX_HTTP_MAIN_CONF|NGX_HTTP_SRV_CONF|NGX_HTTP_LOC_CONF + |NGX_CONF_NOARGS|NGX_CONF_TAKE1, + ngx_http_ipng_stats_scrape, + NGX_HTTP_LOC_CONF_OFFSET, + offsetof(ngx_http_ipng_stats_loc_conf_t, enabled), + NULL }, + + { ngx_string("ipng_stats_logtail"), + NGX_HTTP_MAIN_CONF|NGX_CONF_2MORE, + ngx_http_ipng_stats_logtail, + NGX_HTTP_MAIN_CONF_OFFSET, + 0, + NULL }, + + ngx_null_command +}; + + +static ngx_http_module_t ngx_http_ipng_stats_module_ctx = { + ngx_http_ipng_stats_preconfig, /* preconfiguration */ + ngx_http_ipng_stats_postconfig, /* postconfiguration */ + + ngx_http_ipng_stats_create_main_conf, /* create main configuration */ + ngx_http_ipng_stats_init_main_conf, /* init main configuration */ + + NULL, /* create server configuration */ + NULL, /* merge server configuration */ + + ngx_http_ipng_stats_create_loc_conf, /* create location configuration */ + ngx_http_ipng_stats_merge_loc_conf /* merge location configuration */ +}; + + +ngx_module_t ngx_http_ipng_stats_module = { + NGX_MODULE_V1, + &ngx_http_ipng_stats_module_ctx, /* module context */ + ngx_http_ipng_stats_commands, /* module directives */ + NGX_HTTP_MODULE, /* module type */ + NULL, /* init master */ + ngx_http_ipng_stats_init_module, /* init module */ + ngx_http_ipng_stats_init_worker, /* init process */ + NULL, /* init thread */ + NULL, /* exit thread */ + ngx_http_ipng_stats_exit_worker, /* exit process */ + NULL, /* exit master */ + NGX_MODULE_V1_PADDING +}; + + +/* ----------------------------------------------------------------- */ +/* Preconfig: replace ngx_http_core_module's `listen` handler. */ +/* ----------------------------------------------------------------- */ + +static ngx_int_t +ngx_http_ipng_stats_preconfig(ngx_conf_t *cf) +{ + ngx_command_t *cmd; + + /* ngx_http_core_commands is not const in the nginx tree, so we can + * rebind its `set` function pointer here. We only do this once per + * process, the first time preconfig runs; subsequent reloads reuse + * the same wrapper without re-saving the original. */ + + if (ngx_http_core_listen_orig != NULL) { + return NGX_OK; + } + + for (cmd = ngx_http_core_module.commands; cmd->name.len != 0; cmd++) { + if (cmd->name.len == sizeof("listen") - 1 + && ngx_strncmp(cmd->name.data, "listen", 6) == 0) + { + ngx_http_core_listen_orig = cmd->set; + cmd->set = ngx_http_ipng_stats_listen_wrapper; + return NGX_OK; + } + } + + ngx_log_error(NGX_LOG_EMERG, cf->log, 0, + "ipng_stats: could not locate ngx_http_core_module " + "\"listen\" directive to wrap"); + return NGX_ERROR; +} + + +/* The wrapper extracts device= and ipng_source_tag= from cf->args, compacting + * the array in place, then calls the original ngx_http_core_module + * listen handler. After a successful call it records a binding in + * imcf->bindings using the last listen_opt from the current core srv + * conf, to be resolved to an ngx_listening_t* at init_module time. */ + +static char * +ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd, + void *conf) +{ + ngx_str_t *value; + ngx_str_t device, source; + ngx_uint_t i, j; + char *rv; + ngx_http_ipng_stats_main_conf_t *imcf; + ngx_http_ipng_stats_binding_t *b; + ngx_url_t u; + + ngx_str_null(&device); + ngx_str_null(&source); + + value = cf->args->elts; + + i = 1; + while (i < cf->args->nelts) { + if (value[i].len > 7 + && ngx_strncmp(value[i].data, "device=", 7) == 0) + { + device.data = value[i].data + 7; + device.len = value[i].len - 7; + for (j = i; j + 1 < cf->args->nelts; j++) { + value[j] = value[j + 1]; + } + cf->args->nelts--; + continue; + } + if (value[i].len > 16 + && ngx_strncmp(value[i].data, "ipng_source_tag=", 16) == 0) + { + source.data = value[i].data + 16; + source.len = value[i].len - 16; + for (j = i; j + 1 < cf->args->nelts; j++) { + value[j] = value[j + 1]; + } + cf->args->nelts--; + continue; + } + i++; + } + + if (device.len > 0 || source.len > 0) { + /* Force nginx to create a dedicated listening socket for this + * address even when a wildcard on the same port already exists. + * Without `bind`, nginx's optimizer eliminates specific-address + * sockets that are covered by a wildcard, which would prevent us + * from applying SO_BINDTODEVICE and tagging traffic per device. */ + ngx_str_t *bind_arg = ngx_array_push(cf->args); + if (bind_arg == NULL) { + return NGX_CONF_ERROR; + } + ngx_str_set(bind_arg, "bind"); + } + + rv = ngx_http_core_listen_orig(cf, cmd, conf); + if (rv != NGX_CONF_OK) { + return rv; + } + + if (device.len == 0 && source.len == 0) { + return NGX_CONF_OK; + } + + if (cf->args->nelts < 2) { + return NGX_CONF_OK; + } + + /* Listen options are not stored on the core srv conf in a way we + * can cheaply recover after the original handler runs (the core + * `listen` field is a 1-bit flag). Instead we reparse the address + * argument ourselves so we know which sockaddr to match against + * cycle->listening[] at init_module time. */ + ngx_memzero(&u, sizeof(ngx_url_t)); + u.url = value[1]; + u.listen = 1; + u.default_port = 80; + + if (ngx_parse_url(cf->pool, &u) != NGX_OK || u.naddrs == 0) { + /* The original handler already accepted this address, so a + * reparse failure would be surprising. Skip binding rather + * than fail the reload. */ + return NGX_CONF_OK; + } + + imcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_ipng_stats_module); + + if (imcf->bindings == NULL) { + imcf->bindings = ngx_array_create(cf->pool, 8, + sizeof(ngx_http_ipng_stats_binding_t)); + if (imcf->bindings == NULL) { + return NGX_CONF_ERROR; + } + } + + /* Record one binding per resolved address (ngx_parse_url may yield + * multiple for a hostname; listen specs use literal addresses so + * naddrs is almost always 1). */ + for (i = 0; i < u.naddrs; i++) { + b = ngx_array_push(imcf->bindings); + if (b == NULL) { + return NGX_CONF_ERROR; + } + ngx_memzero(b, sizeof(*b)); + + if (device.len > 0) { + b->device.data = ngx_pnalloc(cf->pool, device.len); + if (b->device.data == NULL) { + return NGX_CONF_ERROR; + } + ngx_memcpy(b->device.data, device.data, device.len); + b->device.len = device.len; + } + if (source.len > 0) { + b->source.data = ngx_pnalloc(cf->pool, source.len); + if (b->source.data == NULL) { + return NGX_CONF_ERROR; + } + ngx_memcpy(b->source.data, source.data, source.len); + b->source.len = source.len; + } else if (device.len > 0) { + /* FR-1.4: default source = device name. */ + b->source = b->device; + } + + b->socklen = u.addrs[i].socklen; + ngx_memcpy(&b->sockaddr, u.addrs[i].sockaddr, u.addrs[i].socklen); + } + + return NGX_CONF_OK; +} + + +/* ----------------------------------------------------------------- */ +/* Config create/merge/init */ +/* ----------------------------------------------------------------- */ + +static void * +ngx_http_ipng_stats_create_main_conf(ngx_conf_t *cf) +{ + ngx_http_ipng_stats_main_conf_t *imcf; + + imcf = ngx_pcalloc(cf->pool, sizeof(*imcf)); + if (imcf == NULL) { + return NULL; + } + + imcf->flush_interval = NGX_CONF_UNSET_MSEC; + imcf->nbuckets = 0; + imcf->bucket_bounds_ms = NULL; + imcf->enabled = NGX_CONF_UNSET; + + return imcf; +} + + +static char * +ngx_http_ipng_stats_init_main_conf(ngx_conf_t *cf, void *conf) +{ + ngx_http_ipng_stats_main_conf_t *imcf = conf; + static const ngx_uint_t default_bounds[] = NGX_HTTP_IPNG_STATS_DEFAULT_BUCKETS; + ngx_uint_t i; + + ngx_conf_init_msec_value(imcf->flush_interval, 1000); + if (imcf->flush_interval < 100) { + ngx_log_error(NGX_LOG_EMERG, cf->log, 0, + "ipng_stats_flush_interval must be at least 100ms"); + return NGX_CONF_ERROR; + } + + if (imcf->default_source.len == 0) { + ngx_str_set(&imcf->default_source, "direct"); + } + + if (imcf->nbuckets == 0) { + imcf->nbuckets = NGX_HTTP_IPNG_STATS_DEFAULT_BUCKET_COUNT; + imcf->bucket_bounds_ms = ngx_palloc(cf->pool, + imcf->nbuckets * sizeof(ngx_uint_t)); + if (imcf->bucket_bounds_ms == NULL) { + return NGX_CONF_ERROR; + } + for (i = 0; i < imcf->nbuckets; i++) { + imcf->bucket_bounds_ms[i] = default_bounds[i]; + } + } + + if (imcf->enabled == NGX_CONF_UNSET) { + imcf->enabled = 1; + } + + /* logtail_fmt, logtail_udp_addr, logtail_buf_size, logtail_flush + * are set by ipng_stats_logtail if the directive is present; they + * default to NULL/0 from pcalloc. */ + + return NGX_CONF_OK; +} + + +static void * +ngx_http_ipng_stats_create_loc_conf(ngx_conf_t *cf) +{ + ngx_http_ipng_stats_loc_conf_t *ilcf; + + ilcf = ngx_pcalloc(cf->pool, sizeof(*ilcf)); + if (ilcf == NULL) { + return NULL; + } + ilcf->enabled = NGX_CONF_UNSET; + ilcf->is_scrape_handler = NGX_CONF_UNSET; + return ilcf; +} + + +static char * +ngx_http_ipng_stats_merge_loc_conf(ngx_conf_t *cf, void *parent, void *child) +{ + ngx_http_ipng_stats_loc_conf_t *prev = parent; + ngx_http_ipng_stats_loc_conf_t *conf = child; + + ngx_conf_merge_value(conf->enabled, prev->enabled, 1); + ngx_conf_merge_value(conf->is_scrape_handler, prev->is_scrape_handler, 0); + + /* Scrape-handler locations never count themselves (FR-5.5). */ + if (conf->is_scrape_handler) { + conf->enabled = 0; + } + + return NGX_CONF_OK; +} + + +/* ipng_stats_zone name:size */ +static char * +ngx_http_ipng_stats_zone(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) +{ + ngx_http_ipng_stats_main_conf_t *imcf = conf; + ngx_str_t *value; + u_char *p; + ssize_t size; + ngx_str_t name; + + value = cf->args->elts; + + p = (u_char *) ngx_strchr(value[1].data, ':'); + if (p == NULL) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "invalid ipng_stats_zone \"%V\"; expected name:size", + &value[1]); + return NGX_CONF_ERROR; + } + + name.data = value[1].data; + name.len = p - value[1].data; + + p++; + size = ngx_parse_size(&(ngx_str_t){ .data = p, + .len = value[1].len - (p - value[1].data) }); + if (size == NGX_ERROR || size < (ssize_t) (64 * 1024)) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ipng_stats_zone \"%V\" size is too small", + &value[1]); + return NGX_CONF_ERROR; + } + + imcf->zone_name = name; + imcf->zone_size = size; + + imcf->shm_zone = ngx_shared_memory_add(cf, &name, size, + &ngx_http_ipng_stats_module); + if (imcf->shm_zone == NULL) { + return NGX_CONF_ERROR; + } + imcf->shm_zone->init = ngx_http_ipng_stats_init_zone; + imcf->shm_zone->data = imcf; + + return NGX_CONF_OK; +} + + +/* ipng_stats_buckets ... */ +static char * +ngx_http_ipng_stats_buckets(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) +{ + ngx_http_ipng_stats_main_conf_t *imcf = conf; + ngx_str_t *value; + ngx_uint_t i, prev, n; + + value = cf->args->elts; + n = cf->args->nelts - 1; + + if (n < 1) { + return "requires at least one bucket boundary"; + } + + imcf->bucket_bounds_ms = ngx_palloc(cf->pool, n * sizeof(ngx_uint_t)); + if (imcf->bucket_bounds_ms == NULL) { + return NGX_CONF_ERROR; + } + imcf->nbuckets = n; + + prev = 0; + for (i = 0; i < n; i++) { + ngx_int_t v = ngx_atoi(value[i + 1].data, value[i + 1].len); + if (v == NGX_ERROR || v <= 0 || (ngx_uint_t) v <= prev) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ipng_stats_buckets values must be strictly increasing positive " + "integers; got \"%V\"", &value[i + 1]); + return NGX_CONF_ERROR; + } + imcf->bucket_bounds_ms[i] = (ngx_uint_t) v; + prev = (ngx_uint_t) v; + } + return NGX_CONF_OK; +} + + +static char * +ngx_http_ipng_stats_scrape(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) +{ + ngx_http_ipng_stats_loc_conf_t *ilcf = conf; + ngx_http_core_loc_conf_t *clcf; + ngx_str_t *value; + + /* Argument-less form inside a location block turns that location + * into the scrape handler. */ + if (cf->args->nelts == 1) { + clcf = ngx_http_conf_get_module_loc_conf(cf, ngx_http_core_module); + clcf->handler = ngx_http_ipng_stats_content_handler; + ilcf->is_scrape_handler = 1; + ilcf->enabled = 0; + return NGX_CONF_OK; + } + + /* With an on/off argument, toggles counting in the current context. */ + value = cf->args->elts; + if (value[1].len == 2 && ngx_strncmp(value[1].data, "on", 2) == 0) { + ilcf->enabled = 1; + return NGX_CONF_OK; + } + if (value[1].len == 3 && ngx_strncmp(value[1].data, "off", 3) == 0) { + ilcf->enabled = 0; + return NGX_CONF_OK; + } + + return "expects \"on\", \"off\", or no argument"; +} + + +/* ipng_stats_logtail udp://host:port [buffer=] [flush=] + * + * Enables a global access log that fires for every request via our + * log-phase handler, using a compiled log_format looked up by name + * from ngx_http_log_module. Per-worker buffered UDP datagrams. */ + +static char * +ngx_http_ipng_stats_logtail(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) +{ + ngx_http_ipng_stats_main_conf_t *imcf = conf; + ipng_log_main_conf_t *lmcf; + ipng_log_fmt_t *fmt; + ngx_str_t *value; + ngx_uint_t i; + ssize_t buf_size; + ngx_msec_t flush_ms; + + if (imcf->logtail_fmt != NULL) { + return "is duplicate"; + } + + value = cf->args->elts; + + /* Look up the named log_format in ngx_http_log_module. */ + lmcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_log_module); + if (lmcf == NULL) { + return "ngx_http_log_module is not available"; + } + + fmt = lmcf->formats.elts; + imcf->logtail_fmt = NULL; + for (i = 0; i < lmcf->formats.nelts; i++) { + if (fmt[i].name.len == value[1].len + && ngx_strncmp(fmt[i].name.data, value[1].data, value[1].len) == 0) + { + imcf->logtail_fmt = &fmt[i]; + break; + } + } + if (imcf->logtail_fmt == NULL) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ipng_stats_logtail: unknown log_format \"%V\"", + &value[1]); + return NGX_CONF_ERROR; + } + + /* Destination: udp://host:port (only UDP is supported). */ + if (value[2].len <= 6 + || ngx_strncmp(value[2].data, "udp://", 6) != 0) + { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ipng_stats_logtail: destination must be udp://host:port, " + "got \"%V\"", &value[2]); + return NGX_CONF_ERROR; + } + + { + /* Parse udp://host:port */ + u_char *colon; + ngx_str_t host_str, port_str; + ngx_int_t port; + struct in_addr addr; + + host_str.data = value[2].data + 6; + host_str.len = value[2].len - 6; + colon = ngx_strlchr(host_str.data, host_str.data + host_str.len, ':'); + if (colon == NULL) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ipng_stats_logtail: udp:// requires host:port, got \"%V\"", + &value[2]); + return NGX_CONF_ERROR; + } + port_str.data = colon + 1; + port_str.len = host_str.len - (size_t)(colon + 1 - host_str.data); + host_str.len = (size_t)(colon - host_str.data); + + port = ngx_atoi(port_str.data, port_str.len); + if (port < 1 || port > 65535) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ipng_stats_logtail: invalid UDP port in \"%V\"", &value[2]); + return NGX_CONF_ERROR; + } + + /* Resolve host — only literal IPv4 for simplicity. */ + u_char tmp[16]; + if (host_str.len >= sizeof(tmp)) { + return "udp:// host too long"; + } + ngx_memcpy(tmp, host_str.data, host_str.len); + tmp[host_str.len] = '\0'; + if (inet_aton((char *) tmp, &addr) == 0) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ipng_stats_logtail: invalid IPv4 address in \"%V\"", + &value[2]); + return NGX_CONF_ERROR; + } + + ngx_memzero(&imcf->logtail_udp_addr, sizeof(struct sockaddr_in)); + imcf->logtail_udp_addr.sin_family = AF_INET; + imcf->logtail_udp_addr.sin_port = htons((in_port_t) port); + imcf->logtail_udp_addr.sin_addr = addr; + } + + /* Defaults. */ + buf_size = 64 * 1024; + flush_ms = 1000; + + /* Parse optional key=value parameters. */ + for (i = 3; i < cf->args->nelts; i++) { + if (value[i].len > 7 + && ngx_strncmp(value[i].data, "buffer=", 7) == 0) + { + ngx_str_t s = { value[i].len - 7, value[i].data + 7 }; + buf_size = ngx_parse_size(&s); + if (buf_size == NGX_ERROR || buf_size < 1024) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ipng_stats_logtail: invalid buffer size \"%V\"", + &value[i]); + return NGX_CONF_ERROR; + } + continue; + } + if (value[i].len > 6 + && ngx_strncmp(value[i].data, "flush=", 6) == 0) + { + ngx_str_t s = { value[i].len - 6, value[i].data + 6 }; + flush_ms = ngx_parse_time(&s, 0); + if (flush_ms == (ngx_msec_t) NGX_ERROR || flush_ms < 100) { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ipng_stats_logtail: invalid flush interval \"%V\"", + &value[i]); + return NGX_CONF_ERROR; + } + continue; + } + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ipng_stats_logtail: unknown parameter \"%V\"", &value[i]); + return NGX_CONF_ERROR; + } + + imcf->logtail_buf_size = (size_t) buf_size; + imcf->logtail_flush = flush_ms; + + return NGX_CONF_OK; +} + + +/* ----------------------------------------------------------------- */ +/* Postconfig: install log-phase handler */ +/* ----------------------------------------------------------------- */ + +static ngx_int_t +ngx_http_ipng_stats_postconfig(ngx_conf_t *cf) +{ + ngx_http_handler_pt *h; + ngx_http_core_main_conf_t *cmcf; + ngx_http_ipng_stats_main_conf_t *imcf; + ngx_http_variable_t *var; + ngx_str_t v_source = ngx_string("ipng_source_tag"); + + /* Register $ipng_source_tag unconditionally — it's useful in + * log_format, map, add_header, etc. even when ipng_stats_zone + * isn't configured. For the VIP address, operators use nginx's + * built-in $server_addr, which is functionally identical. */ + var = ngx_http_add_variable(cf, &v_source, NGX_HTTP_VAR_NOCACHEABLE); + if (var == NULL) { + return NGX_ERROR; + } + var->get_handler = ngx_http_ipng_stats_source_variable; + + imcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_ipng_stats_module); + + /* Loading the module into a config that doesn't use it must be a + * no-op — operators may install the Debian package on hosts where + * they haven't yet added `ipng_stats_zone`. In that case we simply + * skip installing the log handler and sit idle. */ + if (imcf->shm_zone == NULL) { + return NGX_OK; + } + + cmcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_core_module); + + h = ngx_array_push(&cmcf->phases[NGX_HTTP_LOG_PHASE].handlers); + if (h == NULL) { + return NGX_ERROR; + } + *h = ngx_http_ipng_stats_log_handler; + + return NGX_OK; +} + + +/* ----------------------------------------------------------------- */ +/* Shared-memory zone init */ +/* ----------------------------------------------------------------- */ + +static void +ngx_http_ipng_stats_rbtree_insert(ngx_rbtree_node_t *temp, + ngx_rbtree_node_t *node, ngx_rbtree_node_t *sentinel) +{ + /* Plain hash-keyed rbtree insertion; duplicate keys (hash + * collisions) are resolved by walking sibling chains at lookup + * time. */ + for ( ;; ) { + if (node->key < temp->key) { + if (temp->left == sentinel) { + temp->left = node; + break; + } + temp = temp->left; + } else { + if (temp->right == sentinel) { + temp->right = node; + break; + } + temp = temp->right; + } + } + node->parent = temp; + node->left = sentinel; + node->right = sentinel; + ngx_rbt_red(node); +} + + +static ngx_int_t +ngx_http_ipng_stats_init_zone(ngx_shm_zone_t *shm_zone, void *data) +{ + ngx_http_ipng_stats_main_conf_t *imcf = shm_zone->data; + ngx_slab_pool_t *slab; + ngx_http_ipng_stats_shctx_t *sh; + + slab = (ngx_slab_pool_t *) shm_zone->shm.addr; + + /* On reload, nginx may or may not set shm.exists depending on + * dynamic-module lifecycle subtleties. As a belt-and-suspenders + * check, also look at slab->data: if our shctx is already pinned + * there from a previous cycle, the zone is reusable regardless + * of what shm.exists says. */ + sh = slab->data; + if (sh != NULL && sh->magic == 0x49504E47u /* "IPNG" */) { + ngx_log_error(NGX_LOG_NOTICE, shm_zone->shm.log, 0, + "ipng_stats: init_zone: reusing existing zone " + "(sources=%ui, vips=%ui)", + sh->sources.nelts, sh->vips.nelts); + shm_zone->data = sh; + return NGX_OK; + } + + sh = ngx_slab_alloc(slab, sizeof(*sh)); + if (sh == NULL) { + return NGX_ERROR; + } + ngx_memzero(sh, sizeof(*sh)); + sh->magic = 0x49504E47u; /* "IPNG" */ + slab->data = sh; + + ngx_rbtree_init(&sh->rbtree, &sh->sentinel, + ngx_http_ipng_stats_rbtree_insert); + ngx_queue_init(&sh->lru); + + sh->sources.nalloc = 16; + sh->sources.entries = ngx_slab_alloc(slab, + sh->sources.nalloc * sizeof(ngx_str_t)); + if (sh->sources.entries == NULL) { + return NGX_ERROR; + } + sh->sources.nelts = 0; + + sh->vips.nalloc = 64; + sh->vips.entries = ngx_slab_alloc(slab, + sh->vips.nalloc * sizeof(ngx_str_t)); + if (sh->vips.entries == NULL) { + return NGX_ERROR; + } + sh->vips.nelts = 0; + + imcf->shm_zone->data = sh; + + return NGX_OK; +} + + +/* ----------------------------------------------------------------- */ +/* init_module: apply SO_BINDTODEVICE to the opened listen sockets */ +/* ----------------------------------------------------------------- */ + +static ngx_int_t +ngx_http_ipng_stats_init_module(ngx_cycle_t *cycle) +{ + ngx_http_ipng_stats_main_conf_t *imcf; + ngx_http_ipng_stats_binding_t *bindings; + ngx_listening_t *ls; + ngx_uint_t i, j; + char devname[IFNAMSIZ]; + size_t dlen; + + imcf = ngx_http_cycle_get_module_main_conf(cycle, + ngx_http_ipng_stats_module); + if (imcf == NULL) { + ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, + "ipng_stats: init_module: imcf is NULL"); + return NGX_OK; + } + if (imcf->bindings == NULL) { + ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, + "ipng_stats: init_module: no bindings (no device= " + "or ipng_source_tag= on any listen)"); + return NGX_OK; + } + + bindings = imcf->bindings->elts; + ls = cycle->listening.elts; + + ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, + "ipng_stats: init_module: %ui bindings, %ui listeners", + imcf->bindings->nelts, cycle->listening.nelts); + + for (i = 0; i < cycle->listening.nelts; i++) { + ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, + "ipng_stats: listener[%ui]: fd=%d addr=%V socklen=%d", + i, (int) ls[i].fd, &ls[i].addr_text, + (int) ls[i].socklen); + } + + for (j = 0; j < imcf->bindings->nelts; j++) { + ngx_int_t matched = 0; + + for (i = 0; i < cycle->listening.nelts; i++) { + if (ls[i].socklen != bindings[j].socklen) { + continue; + } + if (ngx_cmp_sockaddr(ls[i].sockaddr, ls[i].socklen, + (struct sockaddr *) &bindings[j].sockaddr, + bindings[j].socklen, 1) != NGX_OK) + { + continue; + } + matched = 1; + bindings[j].listening = &ls[i]; + + if (bindings[j].device.len > 0 && ls[i].fd != (ngx_socket_t) -1) { + dlen = bindings[j].device.len < IFNAMSIZ - 1 + ? bindings[j].device.len : IFNAMSIZ - 1; + ngx_memcpy(devname, bindings[j].device.data, dlen); + devname[dlen] = '\0'; + + if (setsockopt(ls[i].fd, SOL_SOCKET, SO_BINDTODEVICE, + devname, (socklen_t) (dlen + 1)) == -1) + { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "ipng_stats: setsockopt(SO_BINDTODEVICE, " + "\"%s\") failed for listen fd %d", + devname, (int) ls[i].fd); + return NGX_ERROR; + } + ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, + "ipng_stats: bound listen fd %d to device " + "\"%s\" (source=\"%V\")", + (int) ls[i].fd, devname, &bindings[j].source); + } + break; + } + if (!matched) { + u_char buf[NGX_SOCKADDR_STRLEN]; + size_t len; + len = ngx_sock_ntop((struct sockaddr *) &bindings[j].sockaddr, + bindings[j].socklen, buf, sizeof(buf), 1); + ngx_log_error(NGX_LOG_WARN, cycle->log, 0, + "ipng_stats: no listener matched binding " + "source=\"%V\" addr=%*s socklen=%d", + &bindings[j].source, len, buf, + (int) bindings[j].socklen); + } + } + + return NGX_OK; +} + + +/* ----------------------------------------------------------------- */ +/* Worker init/exit and flush timer */ +/* ----------------------------------------------------------------- */ + +static ngx_int_t +ngx_http_ipng_stats_init_worker(ngx_cycle_t *cycle) +{ + ngx_http_ipng_stats_main_conf_t *imcf; + ngx_http_ipng_stats_worker_t *w = &ngx_http_ipng_stats_worker; + size_t arena_bytes; + + if (ngx_process != NGX_PROCESS_WORKER + && ngx_process != NGX_PROCESS_SINGLE) + { + return NGX_OK; + } + + imcf = ngx_http_cycle_get_module_main_conf(cycle, + ngx_http_ipng_stats_module); + if (imcf == NULL || imcf->shm_zone == NULL) { + return NGX_OK; + } + + w->log = cycle->log; + w->nalloc = 256; + w->nslots = 0; + w->dirty_head = NULL; + w->slots = ngx_pcalloc(cycle->pool, + w->nalloc * sizeof(ngx_http_ipng_stats_slot_t)); + if (w->slots == NULL) { + return NGX_ERROR; + } + + arena_bytes = w->nalloc * (imcf->nbuckets + 1) * sizeof(uint64_t); + w->dhist_arena = ngx_pcalloc(cycle->pool, arena_bytes); + w->uhist_arena = ngx_pcalloc(cycle->pool, arena_bytes); + if (w->dhist_arena == NULL || w->uhist_arena == NULL) { + return NGX_ERROR; + } + + /* Schedule first flush tick. */ + ngx_memzero(&w->flush_ev, sizeof(w->flush_ev)); + w->flush_ev.handler = ngx_http_ipng_stats_flush_timer; + w->flush_ev.log = cycle->log; + w->flush_ev.data = w; + w->flush_ev.cancelable = 1; + + ngx_add_timer(&w->flush_ev, imcf->flush_interval); + + /* Logtail buffer + flush timer (UDP only). */ + w->logtail_udp_fd = (ngx_socket_t) -1; + + if (imcf->logtail_fmt != NULL) { + w->logtail_buf = ngx_palloc(cycle->pool, imcf->logtail_buf_size); + if (w->logtail_buf == NULL) { + return NGX_ERROR; + } + w->logtail_pos = w->logtail_buf; + w->logtail_end = w->logtail_buf + imcf->logtail_buf_size; + + /* Open per-worker UDP socket. */ + w->logtail_udp_fd = socket(AF_INET, SOCK_DGRAM, 0); + if (w->logtail_udp_fd == (ngx_socket_t) -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "ipng_stats: socket(SOCK_DGRAM) failed"); + return NGX_ERROR; + } + + ngx_memzero(&w->logtail_flush_ev, sizeof(w->logtail_flush_ev)); + w->logtail_flush_ev.handler = ngx_http_ipng_stats_logtail_flush_handler; + w->logtail_flush_ev.log = cycle->log; + w->logtail_flush_ev.data = w; + w->logtail_flush_ev.cancelable = 1; + + ngx_add_timer(&w->logtail_flush_ev, imcf->logtail_flush); + } + + return NGX_OK; +} + + +static void +ngx_http_ipng_stats_exit_worker(ngx_cycle_t *cycle) +{ + ngx_http_ipng_stats_worker_t *w = &ngx_http_ipng_stats_worker; + ngx_http_ipng_stats_main_conf_t *imcf; + + if (w->flush_ev.timer_set) { + ngx_del_timer(&w->flush_ev); + } + + /* Flush logtail buffer before the worker exits. */ + imcf = ngx_http_cycle_get_module_main_conf(cycle, + ngx_http_ipng_stats_module); + if (imcf != NULL) { + ngx_http_ipng_stats_logtail_flush(w, imcf); + } + if (w->logtail_flush_ev.timer_set) { + ngx_del_timer(&w->logtail_flush_ev); + } + if (w->logtail_udp_fd != (ngx_socket_t) -1) { + close(w->logtail_udp_fd); + w->logtail_udp_fd = (ngx_socket_t) -1; + } +} + + +static void +ngx_http_ipng_stats_flush_timer(ngx_event_t *ev) +{ + ngx_http_ipng_stats_worker_t *w = ev->data; + ngx_http_ipng_stats_main_conf_t *imcf; + + imcf = ngx_http_cycle_get_module_main_conf(ngx_cycle, + ngx_http_ipng_stats_module); + if (imcf == NULL) { + return; + } + + ngx_http_ipng_stats_do_flush(w, imcf, ev->log); + + if (!ngx_exiting && !ngx_quit) { + ngx_add_timer(&w->flush_ev, imcf->flush_interval); + } +} + + +static void +ngx_http_ipng_stats_do_flush(ngx_http_ipng_stats_worker_t *w, + ngx_http_ipng_stats_main_conf_t *imcf, ngx_log_t *log) +{ + ngx_http_ipng_stats_shctx_t *sh; + ngx_slab_pool_t *slab; + ngx_http_ipng_stats_slot_t *slot, *next; + ngx_http_ipng_stats_node_t *n; + ngx_rbtree_node_t *rb; + ngx_uint_t i; + ngx_uint_t nbuckets; + size_t node_size; + ngx_atomic_uint_t *shared_lanes; + + if (imcf->shm_zone == NULL) { + return; + } + + slab = (ngx_slab_pool_t *) imcf->shm_zone->shm.addr; + sh = imcf->shm_zone->data; + nbuckets = imcf->nbuckets; + node_size = sizeof(ngx_http_ipng_stats_node_t) + + 2 * (nbuckets + 1) * sizeof(ngx_atomic_uint_t); + + ngx_shmtx_lock(&slab->mutex); + (void) ngx_atomic_fetch_add(&sh->flushes_total, 1); + + for (slot = w->dirty_head; slot != NULL; slot = next) { + next = slot->dirty_next; + slot->dirty_next = NULL; + slot->dirty = 0; + + /* Find or insert the shared-zone node for this hash. */ + rb = sh->rbtree.root; + n = NULL; + while (rb != &sh->sentinel) { + if (slot->hash < rb->key) { + rb = rb->left; + continue; + } + if (slot->hash > rb->key) { + rb = rb->right; + continue; + } + /* hash match: verify full key. */ + n = (ngx_http_ipng_stats_node_t *) rb; + if (n->source_id == slot->source_id + && n->vip_id == slot->vip_id + && n->code == slot->code) + { + break; + } + /* collision — walk right. */ + rb = rb->right; + n = NULL; + } + + if (n == NULL) { + n = ngx_slab_calloc_locked(slab, node_size); + if (n == NULL) { + (void) ngx_atomic_fetch_add(&sh->zone_full_events, 1); + /* Drop this slot's dirty deltas; they will reaccumulate + * on the next request if the operator resizes the zone. */ + slot->requests = 0; + slot->bytes_in = 0; + slot->bytes_out = 0; + slot->duration_sum_ms = 0; + slot->upstream_sum_ms = 0; + for (i = 0; i <= nbuckets; i++) { + slot->dhist[i] = 0; + slot->uhist[i] = 0; + } + continue; + } + n->rbnode.key = slot->hash; + n->source_id = slot->source_id; + n->vip_id = slot->vip_id; + n->code = slot->code; + ngx_rbtree_insert(&sh->rbtree, &n->rbnode); + ngx_queue_insert_tail(&sh->lru, &n->lru); + } + + (void) ngx_atomic_fetch_add(&n->requests, slot->requests); + (void) ngx_atomic_fetch_add(&n->bytes_in, slot->bytes_in); + (void) ngx_atomic_fetch_add(&n->bytes_out, slot->bytes_out); + (void) ngx_atomic_fetch_add(&n->duration_sum_ms, slot->duration_sum_ms); + (void) ngx_atomic_fetch_add(&n->upstream_sum_ms, slot->upstream_sum_ms); + + shared_lanes = (ngx_atomic_uint_t *) (n + 1); + for (i = 0; i <= nbuckets; i++) { + if (slot->dhist[i]) { + (void) ngx_atomic_fetch_add(&shared_lanes[i], slot->dhist[i]); + } + if (slot->uhist[i]) { + (void) ngx_atomic_fetch_add( + &shared_lanes[nbuckets + 1 + i], slot->uhist[i]); + } + } + + /* Clear local deltas. */ + slot->requests = 0; + slot->bytes_in = 0; + slot->bytes_out = 0; + slot->duration_sum_ms = 0; + slot->upstream_sum_ms = 0; + for (i = 0; i <= nbuckets; i++) { + slot->dhist[i] = 0; + slot->uhist[i] = 0; + } + + ngx_queue_remove(&n->lru); + ngx_queue_insert_tail(&sh->lru, &n->lru); + } + + w->dirty_head = NULL; + + ngx_shmtx_unlock(&slab->mutex); +} + + +/* ----------------------------------------------------------------- */ +/* Log-phase handler */ +/* ----------------------------------------------------------------- */ + +static ngx_uint_t +ngx_http_ipng_stats_status_index(ngx_uint_t code) +{ + if (code < NGX_HTTP_IPNG_STATS_CODE_MIN + || code > NGX_HTTP_IPNG_STATS_CODE_MAX) + { + return NGX_HTTP_IPNG_STATS_CODE_UNKNOWN; + } + return code; +} + + +static ngx_uint_t +ngx_http_ipng_stats_bucket_index(ngx_uint_t ms, ngx_uint_t *bounds, + ngx_uint_t nbuckets) +{ + /* Binary search for the smallest bound >= ms. Return nbuckets for + * values exceeding the final bound (the implicit +Inf bucket). */ + ngx_uint_t lo = 0, hi = nbuckets; + while (lo < hi) { + ngx_uint_t mid = (lo + hi) >> 1; + if (ms <= bounds[mid]) { + hi = mid; + } else { + lo = mid + 1; + } + } + return lo; +} + + +static ngx_int_t +ngx_http_ipng_stats_resolve_source(ngx_http_request_t *r, + ngx_http_ipng_stats_main_conf_t *imcf, ngx_str_t *source_out) +{ + ngx_http_ipng_stats_binding_t *b; + ngx_uint_t i; + + if (imcf->bindings != NULL) { + b = imcf->bindings->elts; + for (i = 0; i < imcf->bindings->nelts; i++) { + if (b[i].listening == r->connection->listening) { + *source_out = b[i].source; + return NGX_OK; + } + } + } + + *source_out = imcf->default_source; + return NGX_OK; +} + + +static ngx_int_t +ngx_http_ipng_stats_canonical_vip(ngx_http_request_t *r, u_char *buf, + size_t buflen, ngx_str_t *vip_out) +{ + size_t n; + + if (r->connection->local_sockaddr == NULL) { + return NGX_ERROR; + } + + /* ngx_sock_ntop(NGX_SOCKADDR_STRLEN) drops port and renders v6 in + * RFC 5952 form. Scope-ids are dropped as part of the format, which + * is what FR-2.5 asks for. */ + n = ngx_sock_ntop(r->connection->local_sockaddr, + r->connection->local_socklen, + buf, buflen, 0); + if (n == 0) { + return NGX_ERROR; + } + vip_out->data = buf; + vip_out->len = n; + return NGX_OK; +} + + +/* Find or allocate a per-worker slot for this key; caller fills in + * deltas. The per-worker table is a flat array with linear scan over + * already-used slots — fine because `nslots` is small. */ +static ngx_http_ipng_stats_slot_t * +ngx_http_ipng_stats_worker_slot(ngx_http_ipng_stats_worker_t *w, + ngx_http_ipng_stats_main_conf_t *imcf, + ngx_uint_t source_id, ngx_uint_t vip_id, ngx_uint_t code) +{ + ngx_http_ipng_stats_slot_t *s; + ngx_uint_t i, hash; + + hash = (source_id * 2654435761u) + ^ (vip_id * 40503u) + ^ (code * 131071u); + + for (i = 0; i < w->nslots; i++) { + s = &w->slots[i]; + if (s->hash == hash && s->source_id == source_id + && s->vip_id == vip_id && s->code == code) + { + return s; + } + } + + if (w->nslots >= w->nalloc) { + /* Over the per-worker cap. Drop. The flush will see zero + * dirty slots for this key and the shared zone will not + * learn of it until capacity frees up. */ + return NULL; + } + + s = &w->slots[w->nslots]; + s->hash = hash; + s->source_id = source_id; + s->vip_id = vip_id; + s->code = code; + s->dhist = &w->dhist_arena[w->nslots * (imcf->nbuckets + 1)]; + s->uhist = &w->uhist_arena[w->nslots * (imcf->nbuckets + 1)]; + w->nslots++; + + return s; +} + + +static void +ngx_http_ipng_stats_mark_dirty(ngx_http_ipng_stats_worker_t *w, + ngx_http_ipng_stats_slot_t *s) +{ + if (s->dirty) { + return; + } + s->dirty = 1; + s->dirty_next = w->dirty_head; + w->dirty_head = s; +} + + +static ngx_int_t +ngx_http_ipng_stats_log_handler(ngx_http_request_t *r) +{ + ngx_http_ipng_stats_main_conf_t *imcf; + ngx_http_ipng_stats_loc_conf_t *ilcf; + ngx_http_ipng_stats_worker_t *w = &ngx_http_ipng_stats_worker; + ngx_http_ipng_stats_shctx_t *sh; + ngx_slab_pool_t *slab; + ngx_http_ipng_stats_slot_t *slot; + ngx_str_t source, vip; + u_char vipbuf[NGX_SOCKADDR_STRLEN]; + ngx_uint_t source_id, vip_id, code; + ngx_uint_t bucket; + ngx_msec_int_t elapsed_ms; + ngx_time_t *tp; + + imcf = ngx_http_get_module_main_conf(r, ngx_http_ipng_stats_module); + if (imcf == NULL || imcf->shm_zone == NULL || !imcf->enabled) { + return NGX_OK; + } + + ilcf = ngx_http_get_module_loc_conf(r, ngx_http_ipng_stats_module); + if (ilcf == NULL || !ilcf->enabled) { + return NGX_OK; + } + + if (ngx_http_ipng_stats_resolve_source(r, imcf, &source) != NGX_OK) { + return NGX_OK; + } + + /* For wildcard listeners (0.0.0.0 / ::), local_sockaddr initially + * holds the bind address, not the real VIP. Force a getsockname() + * so we get the actual destination address the client connected to. */ + if (ngx_connection_local_sockaddr(r->connection, NULL, 0) != NGX_OK) { + return NGX_OK; + } + + if (ngx_http_ipng_stats_canonical_vip(r, vipbuf, sizeof(vipbuf), &vip) + != NGX_OK) + { + return NGX_OK; + } + + code = ngx_http_ipng_stats_status_index(r->headers_out.status); + + /* Intern source and vip in the shared zone. This is the only + * shared-zone write outside of flush — it runs rarely (once per + * new (source, vip) pair, ever) and takes the slab mutex. */ + slab = (ngx_slab_pool_t *) imcf->shm_zone->shm.addr; + sh = imcf->shm_zone->data; + + ngx_shmtx_lock(&slab->mutex); + if (ngx_http_ipng_stats_intern_shared(sh, slab, &sh->sources, &source, + &source_id) != NGX_OK + || ngx_http_ipng_stats_intern_shared(sh, slab, &sh->vips, &vip, + &vip_id) != NGX_OK) + { + ngx_shmtx_unlock(&slab->mutex); + return NGX_OK; + } + ngx_shmtx_unlock(&slab->mutex); + + slot = ngx_http_ipng_stats_worker_slot(w, imcf, source_id, vip_id, code); + if (slot == NULL) { + return NGX_OK; + } + + slot->requests += 1; + slot->bytes_in += r->request_length > 0 ? (uint64_t) r->request_length : 0; + slot->bytes_out += (uint64_t) r->connection->sent; + + /* Use the same formula nginx uses for $request_time: two-field + * subtraction via ngx_timeofday(), which is the canonical way to + * compute elapsed time in nginx's cached-time model. */ + tp = ngx_timeofday(); + elapsed_ms = (ngx_msec_int_t) + ((tp->sec - r->start_sec) * 1000 + (tp->msec - r->start_msec)); + if (elapsed_ms < 0) elapsed_ms = 0; + slot->duration_sum_ms += (uint64_t) elapsed_ms; + + bucket = ngx_http_ipng_stats_bucket_index((ngx_uint_t) elapsed_ms, + imcf->bucket_bounds_ms, + imcf->nbuckets); + slot->dhist[bucket] += 1; + + if (r->upstream_states != NULL && r->upstream_states->nelts > 0) { + ngx_http_upstream_state_t *us = r->upstream_states->elts; + ngx_msec_int_t up_ms = (ngx_msec_int_t) us[0].response_time; + if (up_ms > 0) { + slot->upstream_sum_ms += (uint64_t) up_ms; + bucket = ngx_http_ipng_stats_bucket_index((ngx_uint_t) up_ms, + imcf->bucket_bounds_ms, + imcf->nbuckets); + slot->uhist[bucket] += 1; + } + } + + ngx_http_ipng_stats_mark_dirty(w, slot); + + /* Global logtail — runs for every request, uses compiled log_format + * ops, writes to a per-worker buffer. */ + if (imcf->logtail_fmt != NULL) { + ngx_http_ipng_stats_logtail_write(r, imcf, w); + } + + return NGX_OK; +} + + +/* ----------------------------------------------------------------- */ +/* String interning (called under slab mutex) */ +/* ----------------------------------------------------------------- */ + +static ngx_int_t +ngx_http_ipng_stats_intern_shared(ngx_http_ipng_stats_shctx_t *sh, + ngx_slab_pool_t *slab, ngx_http_ipng_stats_intern_t *t, ngx_str_t *s, + ngx_uint_t *idx_out) +{ + ngx_uint_t i; + u_char *copy; + ngx_str_t *ne; + + for (i = 0; i < t->nelts; i++) { + if (t->entries[i].len == s->len + && ngx_memcmp(t->entries[i].data, s->data, s->len) == 0) + { + *idx_out = i; + return NGX_OK; + } + } + + if (t->nelts >= t->nalloc) { + /* Grow by 2x. */ + ngx_uint_t new_cap = t->nalloc * 2; + ngx_str_t *ne_arr = ngx_slab_alloc_locked(slab, + new_cap * sizeof(ngx_str_t)); + if (ne_arr == NULL) { + (void) ngx_atomic_fetch_add(&sh->zone_full_events, 1); + return NGX_ERROR; + } + ngx_memcpy(ne_arr, t->entries, t->nelts * sizeof(ngx_str_t)); + ngx_slab_free_locked(slab, t->entries); + t->entries = ne_arr; + t->nalloc = new_cap; + } + + copy = ngx_slab_alloc_locked(slab, s->len); + if (copy == NULL) { + (void) ngx_atomic_fetch_add(&sh->zone_full_events, 1); + return NGX_ERROR; + } + ngx_memcpy(copy, s->data, s->len); + + ne = &t->entries[t->nelts]; + ne->data = copy; + ne->len = s->len; + *idx_out = t->nelts; + t->nelts++; + + return NGX_OK; +} + + +/* ----------------------------------------------------------------- */ +/* Global logtail: write + flush */ +/* ----------------------------------------------------------------- */ + +static void +ngx_http_ipng_stats_logtail_write(ngx_http_request_t *r, + ngx_http_ipng_stats_main_conf_t *imcf, + ngx_http_ipng_stats_worker_t *w) +{ + ipng_log_op_t *ops; + ngx_uint_t i, nops; + size_t line_len; + u_char *p; + + if (imcf->logtail_fmt == NULL || w->logtail_buf == NULL) { + return; + } + + ops = imcf->logtail_fmt->ops->elts; + nops = imcf->logtail_fmt->ops->nelts; + + /* Compute line length. */ + line_len = 1; /* trailing newline */ + for (i = 0; i < nops; i++) { + if (ops[i].len == 0) { + line_len += ops[i].getlen(r, ops[i].data); + } else { + line_len += ops[i].len; + } + } + + /* Flush if the line won't fit. */ + if ((size_t)(w->logtail_end - w->logtail_pos) < line_len) { + ngx_http_ipng_stats_logtail_flush(w, imcf); + } + + /* If it STILL doesn't fit (single line > buffer), write directly. */ + if ((size_t)(w->logtail_end - w->logtail_pos) < line_len) { + u_char *tmp = ngx_pnalloc(r->pool, line_len); + if (tmp == NULL) { + return; + } + p = tmp; + for (i = 0; i < nops; i++) { + p = ops[i].run(r, p, &ops[i]); + } + *p++ = '\n'; + if (w->logtail_udp_fd != (ngx_socket_t) -1) { + (void) sendto(w->logtail_udp_fd, tmp, p - tmp, 0, + (struct sockaddr *) &imcf->logtail_udp_addr, + sizeof(struct sockaddr_in)); + } + return; + } + + /* Append to buffer. */ + p = w->logtail_pos; + for (i = 0; i < nops; i++) { + p = ops[i].run(r, p, &ops[i]); + } + *p++ = '\n'; + w->logtail_pos = p; +} + + +static void +ngx_http_ipng_stats_logtail_flush(ngx_http_ipng_stats_worker_t *w, + ngx_http_ipng_stats_main_conf_t *imcf) +{ + size_t n; + + if (w->logtail_buf == NULL || w->logtail_pos == w->logtail_buf) { + return; + } + + n = w->logtail_pos - w->logtail_buf; + + /* UDP mode: fire-and-forget sendto. If nobody is listening, + * the kernel silently drops the datagram. */ + if (w->logtail_udp_fd != (ngx_socket_t) -1) { + (void) sendto(w->logtail_udp_fd, w->logtail_buf, n, 0, + (struct sockaddr *) &imcf->logtail_udp_addr, + sizeof(struct sockaddr_in)); + } + + w->logtail_pos = w->logtail_buf; +} + + +static void +ngx_http_ipng_stats_logtail_flush_handler(ngx_event_t *ev) +{ + ngx_http_ipng_stats_worker_t *w = ev->data; + ngx_http_ipng_stats_main_conf_t *imcf; + + imcf = ngx_http_cycle_get_module_main_conf(ngx_cycle, + ngx_http_ipng_stats_module); + if (imcf != NULL) { + ngx_http_ipng_stats_logtail_flush(w, imcf); + } + + if (!ngx_exiting && !ngx_quit) { + ngx_add_timer(&w->logtail_flush_ev, + imcf ? imcf->logtail_flush : 1000); + } +} + + +/* ----------------------------------------------------------------- */ +/* Nginx variables: $ipng_source_tag */ +/* ----------------------------------------------------------------- */ + +static ngx_int_t +ngx_http_ipng_stats_source_variable(ngx_http_request_t *r, + ngx_http_variable_value_t *v, uintptr_t data) +{ + ngx_http_ipng_stats_main_conf_t *imcf; + ngx_str_t source; + + (void) data; + + imcf = ngx_http_get_module_main_conf(r, ngx_http_ipng_stats_module); + if (imcf == NULL) { + v->not_found = 1; + return NGX_OK; + } + + if (ngx_http_ipng_stats_resolve_source(r, imcf, &source) != NGX_OK) { + v->not_found = 1; + return NGX_OK; + } + + v->len = source.len; + v->valid = 1; + v->no_cacheable = 0; + v->not_found = 0; + v->data = source.data; + return NGX_OK; +} + + +/* ----------------------------------------------------------------- */ +/* Scrape content handler */ +/* ----------------------------------------------------------------- */ + +static ngx_int_t +ngx_http_ipng_stats_parse_filters(ngx_http_request_t *r, ngx_str_t *src, + ngx_str_t *vip) +{ + ngx_str_t key, val; + u_char *p, *last; + + ngx_str_null(src); + ngx_str_null(vip); + + if (r->args.len == 0) { + return NGX_OK; + } + + p = r->args.data; + last = r->args.data + r->args.len; + + while (p < last) { + key.data = p; + while (p < last && *p != '=' && *p != '&') p++; + key.len = p - key.data; + + if (p < last && *p == '=') { + p++; + val.data = p; + while (p < last && *p != '&') p++; + val.len = p - val.data; + } else { + ngx_str_null(&val); + } + if (p < last && *p == '&') p++; + + if (key.len == 10 && ngx_strncmp(key.data, "source_tag", 10) == 0) { + *src = val; + } else if (key.len == 3 && ngx_strncmp(key.data, "vip", 3) == 0) { + *vip = val; + } + } + return NGX_OK; +} + + +static ngx_int_t +ngx_http_ipng_stats_want_json(ngx_http_request_t *r) +{ + ngx_table_elt_t *h; + ngx_list_part_t *part; + ngx_uint_t i; + + part = &r->headers_in.headers.part; + h = part->elts; + for (i = 0; /* void */; i++) { + if (i >= part->nelts) { + if (part->next == NULL) break; + part = part->next; + h = part->elts; + i = 0; + } + if (h[i].key.len == 6 + && ngx_strncasecmp(h[i].key.data, (u_char *) "accept", 6) == 0) + { + if (ngx_strlcasestrn(h[i].value.data, + h[i].value.data + h[i].value.len, + (u_char *) "application/json", 16 - 1) + != NULL) + { + return 1; + } + return 0; + } + } + return 0; +} + + +static ngx_int_t +ngx_http_ipng_stats_content_handler(ngx_http_request_t *r) +{ + ngx_http_ipng_stats_main_conf_t *imcf; + ngx_str_t filter_src, filter_vip; + ngx_int_t rc; + + if (r->method != NGX_HTTP_GET && r->method != NGX_HTTP_HEAD) { + return NGX_HTTP_NOT_ALLOWED; + } + + rc = ngx_http_discard_request_body(r); + if (rc != NGX_OK) { + return rc; + } + + imcf = ngx_http_get_module_main_conf(r, ngx_http_ipng_stats_module); + if (imcf == NULL || imcf->shm_zone == NULL) { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + ngx_http_ipng_stats_parse_filters(r, &filter_src, &filter_vip); + + if (ngx_http_ipng_stats_want_json(r)) { + return ngx_http_ipng_stats_render_json(r, imcf, + &filter_src, &filter_vip); + } + return ngx_http_ipng_stats_render_prom(r, imcf, + &filter_src, &filter_vip); +} + + +/* Walk the shared-zone rbtree and invoke `emit` on each matching node. + * Caller holds the slab mutex. `emitted` is incremented for each node + * the emitter actually writes; renderers (e.g. JSON) use it to decide + * whether to prepend a separator. */ +typedef ngx_int_t (*ngx_http_ipng_stats_emit_pt)(ngx_http_request_t *r, + ngx_http_ipng_stats_main_conf_t *imcf, + ngx_http_ipng_stats_shctx_t *sh, + ngx_http_ipng_stats_node_t *n, + ngx_chain_t **cl, ngx_chain_t ***cl_last, ngx_uint_t *emitted); + + +static ngx_int_t +ngx_http_ipng_stats_walk(ngx_http_request_t *r, + ngx_http_ipng_stats_main_conf_t *imcf, + ngx_str_t *filter_src, ngx_str_t *filter_vip, + ngx_http_ipng_stats_emit_pt emit, + ngx_chain_t **cl, ngx_chain_t ***cl_last) +{ + ngx_http_ipng_stats_shctx_t *sh = imcf->shm_zone->data; + ngx_slab_pool_t *slab; + ngx_queue_t *q; + ngx_http_ipng_stats_node_t *n; + ngx_str_t *src_entry, *vip_entry; + ngx_int_t rc = NGX_OK; + ngx_uint_t emitted = 0; + + slab = (ngx_slab_pool_t *) imcf->shm_zone->shm.addr; + ngx_shmtx_lock(&slab->mutex); + + for (q = ngx_queue_head(&sh->lru); + q != ngx_queue_sentinel(&sh->lru); + q = ngx_queue_next(q)) + { + n = ngx_queue_data(q, ngx_http_ipng_stats_node_t, lru); + + if (n->source_id >= sh->sources.nelts + || n->vip_id >= sh->vips.nelts) + { + continue; + } + src_entry = &sh->sources.entries[n->source_id]; + vip_entry = &sh->vips.entries[n->vip_id]; + + if (filter_src->len > 0 + && (src_entry->len != filter_src->len + || ngx_memcmp(src_entry->data, filter_src->data, + filter_src->len) != 0)) + { + continue; + } + if (filter_vip->len > 0 + && (vip_entry->len != filter_vip->len + || ngx_memcmp(vip_entry->data, filter_vip->data, + filter_vip->len) != 0)) + { + continue; + } + + rc = emit(r, imcf, sh, n, cl, cl_last, &emitted); + if (rc != NGX_OK) break; + } + + ngx_shmtx_unlock(&slab->mutex); + return rc; +} + + +static ngx_chain_t * +ngx_http_ipng_stats_chain_buf(ngx_http_request_t *r, size_t size) +{ + ngx_buf_t *b; + ngx_chain_t *cl; + + b = ngx_create_temp_buf(r->pool, size); + if (b == NULL) return NULL; + cl = ngx_alloc_chain_link(r->pool); + if (cl == NULL) return NULL; + cl->buf = b; + cl->next = NULL; + return cl; +} + + +static ngx_int_t +ngx_http_ipng_stats_emit_prom(ngx_http_request_t *r, + ngx_http_ipng_stats_main_conf_t *imcf, + ngx_http_ipng_stats_shctx_t *sh, + ngx_http_ipng_stats_node_t *n, + ngx_chain_t **cl_head, ngx_chain_t ***cl_tail, ngx_uint_t *emitted) +{ + ngx_chain_t *cl; + ngx_buf_t *b; + ngx_str_t *src = &sh->sources.entries[n->source_id]; + ngx_str_t *vip = &sh->vips.entries[n->vip_id]; + ngx_uint_t i; + + (void) cl_head; + + /* Reserve enough space for one rendered key: the worst-case single + * Prometheus line with a long label set plus the histogram tail. */ + cl = ngx_http_ipng_stats_chain_buf(r, 2048 + 64 * imcf->nbuckets); + if (cl == NULL) return NGX_ERROR; + b = cl->buf; + + b->last = ngx_sprintf(b->last, + "nginx_ipng_requests_total{source_tag=\"%V\",vip=\"%V\",code=\"%ui\"} %uA\n" + "nginx_ipng_bytes_in_total{source_tag=\"%V\",vip=\"%V\",code=\"%ui\"} %uA\n" + "nginx_ipng_bytes_out_total{source_tag=\"%V\",vip=\"%V\",code=\"%ui\"} %uA\n", + src, vip, n->code, n->requests, + src, vip, n->code, n->bytes_in, + src, vip, n->code, n->bytes_out); + + /* Histogram is per (source, vip); emit only for a canonical code + * slot (0 or the first seen) to avoid N-fold duplication. Keep it + * simple in v0.1: emit once per node, not once per (source, vip). + * Operators who need true histogram aggregation by (source, vip) + * should sum over the code dimension in PromQL. */ + ngx_atomic_uint_t *lanes = (ngx_atomic_uint_t *) (n + 1); + ngx_uint_t cumulative = 0; + + for (i = 0; i < imcf->nbuckets; i++) { + cumulative += lanes[i]; + b->last = ngx_sprintf(b->last, + "nginx_ipng_request_duration_seconds_bucket" + "{source_tag=\"%V\",vip=\"%V\",code=\"%ui\",le=\"%.3f\"} %ui\n", + src, vip, n->code, + (double) imcf->bucket_bounds_ms[i] / 1000.0, + cumulative); + } + cumulative += lanes[imcf->nbuckets]; + b->last = ngx_sprintf(b->last, + "nginx_ipng_request_duration_seconds_bucket" + "{source_tag=\"%V\",vip=\"%V\",code=\"%ui\",le=\"+Inf\"} %ui\n" + "nginx_ipng_request_duration_seconds_sum" + "{source_tag=\"%V\",vip=\"%V\",code=\"%ui\"} %.3f\n" + "nginx_ipng_request_duration_seconds_count" + "{source_tag=\"%V\",vip=\"%V\",code=\"%ui\"} %ui\n", + src, vip, n->code, cumulative, + src, vip, n->code, (double) n->duration_sum_ms / 1000.0, + src, vip, n->code, cumulative); + + b->last_buf = 0; + b->last_in_chain = 0; + + **cl_tail = cl; + *cl_tail = &cl->next; + (*emitted)++; + return NGX_OK; +} + + +static ngx_int_t +ngx_http_ipng_stats_emit_json(ngx_http_request_t *r, + ngx_http_ipng_stats_main_conf_t *imcf, + ngx_http_ipng_stats_shctx_t *sh, + ngx_http_ipng_stats_node_t *n, + ngx_chain_t **cl_head, ngx_chain_t ***cl_tail, ngx_uint_t *emitted) +{ + /* v0.1 JSON output is a flat array of records, one per + * (source, vip, code) key, each carrying the counters and the + * request-duration histogram. The top-level `schema` field is + * versioned; consumers MUST check it before parsing. */ + ngx_chain_t *cl; + ngx_buf_t *b; + ngx_str_t *src = &sh->sources.entries[n->source_id]; + ngx_str_t *vip = &sh->vips.entries[n->vip_id]; + ngx_atomic_uint_t *lanes; + ngx_uint_t i; + size_t buf_size; + + (void) cl_head; + + /* Upper-bound: record header (~256B) + one bucket entry (~32B each) + + * closing braces. Round up generously. */ + buf_size = 512 + 48 * (imcf->nbuckets + 1); + + cl = ngx_http_ipng_stats_chain_buf(r, buf_size); + if (cl == NULL) return NGX_ERROR; + b = cl->buf; + + b->last = ngx_sprintf(b->last, + "%s{\"source_tag\":\"%V\",\"vip\":\"%V\",\"code\":%ui," + "\"requests\":%uA,\"bytes_in\":%uA,\"bytes_out\":%uA," + "\"request_duration_ms\":{" + "\"sum\":%uA,\"count\":%uA,\"buckets\":{", + (*emitted == 0) ? "" : ",", + src, vip, n->code, + n->requests, n->bytes_in, n->bytes_out, + n->duration_sum_ms, n->requests); + + lanes = (ngx_atomic_uint_t *) (n + 1); + for (i = 0; i < imcf->nbuckets; i++) { + b->last = ngx_sprintf(b->last, "%s\"%ui\":%uA", + (i == 0) ? "" : ",", + imcf->bucket_bounds_ms[i], lanes[i]); + } + b->last = ngx_sprintf(b->last, ",\"+Inf\":%uA}}}", + lanes[imcf->nbuckets]); + + **cl_tail = cl; + *cl_tail = &cl->next; + (*emitted)++; + return NGX_OK; +} + + +static ngx_int_t +ngx_http_ipng_stats_send(ngx_http_request_t *r, ngx_str_t *ctype, + ngx_chain_t *out) +{ + ngx_int_t rc; + + r->headers_out.status = NGX_HTTP_OK; + r->headers_out.content_type = *ctype; + r->headers_out.content_type_len = ctype->len; + r->headers_out.content_length_n = -1; /* chunked */ + + rc = ngx_http_send_header(r); + if (rc == NGX_ERROR || rc > NGX_OK || r->header_only) { + return rc; + } + if (out == NULL) { + return ngx_http_send_special(r, NGX_HTTP_LAST); + } + /* Mark final chain link. */ + ngx_chain_t *last = out; + while (last->next) last = last->next; + last->buf->last_buf = 1; + last->buf->last_in_chain = 1; + + return ngx_http_output_filter(r, out); +} + + +static ngx_int_t +ngx_http_ipng_stats_render_prom(ngx_http_request_t *r, + ngx_http_ipng_stats_main_conf_t *imcf, + ngx_str_t *filter_source, ngx_str_t *filter_vip) +{ + ngx_chain_t *out = NULL; + ngx_chain_t **last = &out; + ngx_str_t ctype = ngx_string("text/plain; version=0.0.4"); + ngx_chain_t *hdr_cl; + ngx_buf_t *hdr_b; + + hdr_cl = ngx_http_ipng_stats_chain_buf(r, 512); + if (hdr_cl == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR; + hdr_b = hdr_cl->buf; + hdr_b->last = ngx_sprintf(hdr_b->last, + "# nginx-ipng-stats-plugin %s (schema=%d)\n" + "# HELP nginx_ipng_requests_total Total HTTP requests.\n" + "# TYPE nginx_ipng_requests_total counter\n" + "# HELP nginx_ipng_bytes_in_total Request bytes received.\n" + "# TYPE nginx_ipng_bytes_in_total counter\n" + "# HELP nginx_ipng_bytes_out_total Response bytes sent.\n" + "# TYPE nginx_ipng_bytes_out_total counter\n" + "# HELP nginx_ipng_request_duration_seconds Request duration.\n" + "# TYPE nginx_ipng_request_duration_seconds histogram\n", + NGX_HTTP_IPNG_STATS_VERSION, NGX_HTTP_IPNG_STATS_SCHEMA_VERSION); + *last = hdr_cl; + last = &hdr_cl->next; + + if (ngx_http_ipng_stats_walk(r, imcf, filter_source, filter_vip, + ngx_http_ipng_stats_emit_prom, + &out, &last) != NGX_OK) + { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + return ngx_http_ipng_stats_send(r, &ctype, out); +} + + +static ngx_int_t +ngx_http_ipng_stats_render_json(ngx_http_request_t *r, + ngx_http_ipng_stats_main_conf_t *imcf, + ngx_str_t *filter_source, ngx_str_t *filter_vip) +{ + ngx_chain_t *out = NULL; + ngx_chain_t **last = &out; + ngx_chain_t *hdr_cl, *tail_cl; + ngx_buf_t *hdr_b, *tail_b; + ngx_str_t ctype = ngx_string("application/json"); + + hdr_cl = ngx_http_ipng_stats_chain_buf(r, 64); + if (hdr_cl == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR; + hdr_b = hdr_cl->buf; + hdr_b->last = ngx_sprintf(hdr_b->last, + "{\"schema\":%d,\"records\":[", + NGX_HTTP_IPNG_STATS_SCHEMA_VERSION); + *last = hdr_cl; + last = &hdr_cl->next; + + if (ngx_http_ipng_stats_walk(r, imcf, filter_source, filter_vip, + ngx_http_ipng_stats_emit_json, + &out, &last) != NGX_OK) + { + return NGX_HTTP_INTERNAL_SERVER_ERROR; + } + + tail_cl = ngx_http_ipng_stats_chain_buf(r, 8); + if (tail_cl == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR; + tail_b = tail_cl->buf; + tail_b->last = ngx_sprintf(tail_b->last, "]}\n"); + *last = tail_cl; + last = &tail_cl->next; + + return ngx_http_ipng_stats_send(r, &ctype, out); +} diff --git a/tests/01-module/01-e2e.robot b/tests/01-module/01-e2e.robot new file mode 100644 index 0000000..abab3b8 --- /dev/null +++ b/tests/01-module/01-e2e.robot @@ -0,0 +1,280 @@ +# SPDX-License-Identifier: Apache-2.0 +*** Settings *** +Documentation End-to-end tests for ngx_http_ipng_stats_module. +... Deploys a 3-node containerlab topology and validates +... attribution, counters, histograms, filters, variables, +... and reload semantics. +Library OperatingSystem +Library String +Suite Setup Deploy Lab +Suite Teardown Cleanup Lab + +*** Variables *** +${lab-name} ipng-stats-test +${lab-file} lab/ipng-stats.clab.yml +${runtime} docker +${CLAB_BIN} sudo containerlab +${SERVER} clab-${lab-name}-server +${CLIENT1} clab-${lab-name}-client1 +${CLIENT2} clab-${lab-name}-client2 +${SCRAPE_URL} http://172.20.40.2:9113/.well-known/ipng/statsz +${SERVER_MGMT} http://172.20.40.2:8080 + +*** Test Cases *** + +# --- Basic functionality --- + +Module loads + [Documentation] nginx -t passes with the module loaded. + ${output} = Docker Exec ${SERVER} nginx -t 2>&1 + Should Contain ${output} syntax is ok + +Prometheus scrape + [Documentation] Scrape returns HELP/TYPE preamble. + ${output} = Scrape Prometheus + Should Contain ${output} nginx-ipng-stats-plugin + Should Contain ${output} nginx_ipng_requests_total + +JSON scrape + [Documentation] Accept: application/json returns valid JSON with schema. + ${rc} ${output} = Run And Return Rc And Output + ... curl -sf -H 'Accept: application/json' ${SCRAPE_URL} | python3 -m json.tool + Should Be Equal As Integers ${rc} 0 + Should Contain ${output} "schema": 1 + +# --- Per-device attribution --- + +Attribute cl1 via eth1 + [Documentation] Traffic on server:eth1 carries source_tag=cl1, vip=10.0.1.1. + Send Fast Requests ${CLIENT1} 10.0.1.1 5 + Wait For Flush + ${output} = Scrape Prometheus + Should Contain ${output} source_tag="cl1" + Should Contain ${output} vip="10.0.1.1" + +Attribute cl2 via eth2 + [Documentation] Traffic on server:eth2 carries source_tag=cl2, vip=10.0.2.1. + Send Fast Requests ${CLIENT2} 10.0.2.1 5 + Wait For Flush + ${output} = Scrape Prometheus + Should Contain ${output} source_tag="cl2" + Should Contain ${output} vip="10.0.2.1" + +Direct traffic tagged + [Documentation] Mgmt-interface traffic carries source_tag=direct. + ${rc} ${output} = Run And Return Rc And Output + ... curl -sf ${SERVER_MGMT}/ + Should Be Equal As Integers ${rc} 0 + Wait For Flush + ${output} = Scrape Prometheus + Should Contain ${output} source_tag="direct" + +# --- Status code tracking --- + +Per-code counters + [Documentation] 404 and 200 appear as distinct code= labels. + Docker Exec Ignore Rc ${CLIENT1} curl -s http://10.0.1.1:8080/notfound + Docker Exec Ignore Rc ${CLIENT1} curl -s http://10.0.1.1:8080/notfound + Wait For Flush + ${output} = Scrape With Filter source_tag=cl1 + Should Contain ${output} code="404" + Should Contain ${output} code="200" + +# --- Duration histogram --- + +Duration histogram + [Documentation] proxy_pass to a 50 ms backend populates sum and buckets. + Send Slow Requests ${CLIENT1} 10.0.1.1 3 + Wait For Flush + ${prom} = Scrape With Filter source_tag=cl1 + Should Match Regexp ${prom} request_duration_seconds_sum\\{[^}]*\\}\\s+\\d+\\.\\d*[1-9] + + ${rc} ${json} = Run And Return Rc And Output + ... curl -sf -H 'Accept: application/json' '${SCRAPE_URL}?source_tag=cl1' | python3 -m json.tool + Should Be Equal As Integers ${rc} 0 + Should Contain ${json} request_duration_ms + Should Contain ${json} buckets + +# --- Scrape filters --- + +Filter by source_tag + [Documentation] ?source_tag=cl1 returns cl1 only; cl2 only. + ${output} = Scrape With Filter source_tag=cl1 + Should Contain ${output} source_tag="cl1" + Should Not Contain ${output} source_tag="cl2" + + ${output} = Scrape With Filter source_tag=cl2 + Should Contain ${output} source_tag="cl2" + Should Not Contain ${output} source_tag="cl1" + +Filter by VIP + [Documentation] ?vip=10.0.1.1 excludes 10.0.2.1. + ${output} = Scrape With Filter vip=10.0.1.1 + Should Contain ${output} vip="10.0.1.1" + Should Not Contain ${output} vip="10.0.2.1" + +Filter combined + [Documentation] source_tag + vip intersection. + ${output} = Scrape With Filter source_tag=cl1&vip=10.0.1.1 + Should Contain ${output} source_tag="cl1" + Should Contain ${output} vip="10.0.1.1" + Should Not Contain ${output} source_tag="cl2" + +Filter unknown tag + [Documentation] Unknown source_tag returns empty data set. + ${output} = Scrape With Filter source_tag=nonexistent + Should Not Contain ${output} nginx_ipng_requests_total{ + +# --- nginx variable --- + +Variable in access log + [Documentation] $ipng_source_tag appears as cl1, cl2, direct in log. + ${output} = Docker Exec ${SERVER} cat /var/log/nginx/access.log + Should Match Regexp ${output} src=cl1 + Should Match Regexp ${output} src=cl2 + Should Match Regexp ${output} src=direct + +UDP logtail + [Documentation] ipng_stats_logtail udp:// sends log lines to a local + ... nc listener; captured file has all sources and VIPs. + ${output} = Docker Exec ${SERVER} cat /var/log/nginx/logtail-udp.log + Should Match Regexp ${output} cl1 + Should Match Regexp ${output} cl2 + Should Match Regexp ${output} direct + Should Match Regexp ${output} 10\\.0\\.1\\.1 + Should Match Regexp ${output} 10\\.0\\.2\\.1 + # Tab-separated format + Should Match Regexp ${output} \\t + +VIP in access log + [Documentation] $server_addr resolves to real IPs, not 0.0.0.0. + ${output} = Docker Exec ${SERVER} cat /var/log/nginx/access.log + Should Contain ${output} vip=10.0.1.1 + Should Contain ${output} vip=10.0.2.1 + Should Not Contain ${output} vip=0.0.0.0 + +# --- Reload resilience --- + +Counters survive reload + [Documentation] Shared-memory zone persists across nginx -s reload. + ${before} = Get Request Count cl1 + Docker Exec ${SERVER} nginx -s reload + Sleep 2s Wait for new workers + ${after} = Get Request Count cl1 + Should Be True ${after} >= ${before} + ... Counters dropped after reload: before=${before} after=${after} + +Traffic after reload + [Documentation] New requests are counted after reload. + Send Fast Requests ${CLIENT1} 10.0.1.1 3 + Wait For Flush + ${output} = Scrape With Filter source_tag=cl1 + Should Contain ${output} source_tag="cl1" + +# --- Counter correctness --- + +Request count accuracy + [Documentation] 10 requests per client yields exactly 10 delta. + ${before_cl1} = Get Request Count cl1 + ${before_cl2} = Get Request Count cl2 + Send Fast Requests ${CLIENT1} 10.0.1.1 10 + Send Fast Requests ${CLIENT2} 10.0.2.1 10 + Wait For Flush + ${after_cl1} = Get Request Count cl1 + ${after_cl2} = Get Request Count cl2 + ${delta_cl1} = Evaluate ${after_cl1} - ${before_cl1} + ${delta_cl2} = Evaluate ${after_cl2} - ${before_cl2} + Should Be Equal As Integers ${delta_cl1} 10 + Should Be Equal As Integers ${delta_cl2} 10 + +*** Keywords *** + +# --- Lab lifecycle --- + +Deploy Lab + Run ${CLAB_BIN} --runtime ${runtime} destroy -t ${CURDIR}/${lab-file} --cleanup 2>&1 || true + ${rc} ${output} = Run And Return Rc And Output + ... ${CLAB_BIN} --runtime ${runtime} deploy -t ${CURDIR}/${lab-file} + Log ${output} + Should Be Equal As Integers ${rc} 0 + Wait Until Keyword Succeeds 90s 3s Server Is Ready + Wait Until Keyword Succeeds 60s 3s Client Can Reach Server ${CLIENT1} 10.0.1.1 + Wait Until Keyword Succeeds 60s 3s Client Can Reach Server ${CLIENT2} 10.0.2.1 + +Server Is Ready + ${rc} ${output} = Run And Return Rc And Output + ... curl -sf ${SCRAPE_URL} + Should Be Equal As Integers ${rc} 0 + +Client Can Reach Server + [Arguments] ${client} ${server_ip} + ${rc} ${output} = Run And Return Rc And Output + ... docker exec ${client} curl -sf http://${server_ip}:8080/ + Should Be Equal As Integers ${rc} 0 + +Cleanup Lab + Run docker logs ${SERVER} > ${EXECDIR}/tests/out/server-docker.log 2>&1 + Run docker exec ${SERVER} cat /var/log/nginx/access.log > ${EXECDIR}/tests/out/server-access.log 2>&1 + Run docker exec ${SERVER} cat /var/log/nginx/error.log > ${EXECDIR}/tests/out/server-error.log 2>&1 + Run docker exec ${SERVER} cat /var/log/nginx/logtail-udp.log > ${EXECDIR}/tests/out/server-logtail-udp.log 2>&1 + Run docker exec ${SERVER} ip addr > ${EXECDIR}/tests/out/server-ip-addr.log 2>&1 + Run docker exec ${SERVER} ip route > ${EXECDIR}/tests/out/server-ip-route.log 2>&1 + Run ${CLAB_BIN} --runtime ${runtime} destroy -t ${CURDIR}/${lab-file} --cleanup + +# --- Traffic generation --- + +Send Fast Requests + [Arguments] ${client} ${server_ip} ${count} + FOR ${i} IN RANGE ${count} + Docker Exec ${client} curl -sf http://${server_ip}:8080/ + END + +Send Slow Requests + [Arguments] ${client} ${server_ip} ${count} + FOR ${i} IN RANGE ${count} + Docker Exec ${client} curl -sf http://${server_ip}:8080/slow + END + +Wait For Flush + Sleep 2s + +# --- Scraping --- + +Scrape Prometheus + ${rc} ${output} = Run And Return Rc And Output + ... curl -sf ${SCRAPE_URL} + Should Be Equal As Integers ${rc} 0 + RETURN ${output} + +Scrape With Filter + [Arguments] ${filter} + ${rc} ${output} = Run And Return Rc And Output + ... curl -sf '${SCRAPE_URL}?${filter}' + Should Be Equal As Integers ${rc} 0 + RETURN ${output} + +Get Request Count + [Arguments] ${source} + ${output} = Scrape With Filter source_tag=${source} + ${matches} = Get Regexp Matches ${output} + ... nginx_ipng_requests_total\\{[^}]*\\}\\s+(\\d+) 1 + ${total} = Set Variable 0 + FOR ${m} IN @{matches} + ${total} = Evaluate ${total} + ${m} + END + RETURN ${total} + +# --- Container helpers --- + +Docker Exec + [Arguments] ${container} ${cmd} + ${rc} ${output} = Run And Return Rc And Output + ... docker exec ${container} ${cmd} + Should Be Equal As Integers ${rc} 0 + RETURN ${output} + +Docker Exec Ignore Rc + [Arguments] ${container} ${cmd} + ${rc} ${output} = Run And Return Rc And Output + ... docker exec ${container} ${cmd} + RETURN ${output} diff --git a/tests/01-module/lab/client/start.sh b/tests/01-module/lab/client/start.sh new file mode 100644 index 0000000..7b1a4a3 --- /dev/null +++ b/tests/01-module/lab/client/start.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# SPDX-License-Identifier: Apache-2.0 +# Client container entrypoint: installs curl, waits for containerlab +# to attach the data-plane veth, configures the IP, removes the mgmt +# default route so traffic to the server goes through eth1 (data-plane), +# and stays alive for docker-exec commands from the Robot test. + +apt-get update -qq +apt-get install -y -qq curl iproute2 > /dev/null 2>&1 + +# Wait for containerlab to attach eth1. +echo "Waiting for eth1 ..." +while ! ip link show eth1 > /dev/null 2>&1; do + sleep 0.2 +done +ip link set eth1 up +ip addr add ${MY_IP} dev eth1 + +# Remove the default route so packets to 10.0.x.0/24 go out eth1 +# (the connected route) instead of through the mgmt bridge. +ip route del default 2>/dev/null || true + +exec sleep infinity diff --git a/tests/01-module/lab/ipng-stats.clab.yml b/tests/01-module/lab/ipng-stats.clab.yml new file mode 100644 index 0000000..bb0d5bf --- /dev/null +++ b/tests/01-module/lab/ipng-stats.clab.yml @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: Apache-2.0 +# Containerlab topology for nginx-ipng-stats-plugin end-to-end tests. +# +# Three nodes: +# server — nginx with the module, a slow Python backend, two data-plane interfaces +# client1 — sends traffic via eth1 (attributed to source_tag=cl1) +# client2 — sends traffic via eth2 (attributed to source_tag=cl2) +# +# Links: +# server:eth1 ←→ client1:eth1 (10.0.1.0/24) +# server:eth2 ←→ client2:eth1 (10.0.2.0/24) + +name: ipng-stats-test + +mgmt: + network: ipng-stats-test-net + ipv4-subnet: 172.20.40.0/24 + +topology: + nodes: + server: + kind: linux + image: debian:trixie-slim + mgmt-ipv4: 172.20.40.2 + binds: + - ../../../build:/opt/build:ro + - ./server/nginx.conf:/opt/config/nginx.conf:ro + - ./server/slow-backend.py:/opt/config/slow-backend.py:ro + - ./server/start.sh:/start.sh:ro + cmd: bash /start.sh + + client1: + kind: linux + image: debian:trixie-slim + mgmt-ipv4: 172.20.40.11 + binds: + - ./client/start.sh:/start.sh:ro + cmd: bash /start.sh + env: + MY_IP: 10.0.1.2/24 + + client2: + kind: linux + image: debian:trixie-slim + mgmt-ipv4: 172.20.40.12 + binds: + - ./client/start.sh:/start.sh:ro + cmd: bash /start.sh + env: + MY_IP: 10.0.2.2/24 + + links: + - endpoints: ["server:eth1", "client1:eth1"] + - endpoints: ["server:eth2", "client2:eth1"] diff --git a/tests/01-module/lab/server/nginx.conf b/tests/01-module/lab/server/nginx.conf new file mode 100644 index 0000000..eb7c18d --- /dev/null +++ b/tests/01-module/lab/server/nginx.conf @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: Apache-2.0 +# Test nginx configuration for the ipng_stats module. + +load_module /usr/lib/nginx/modules/ngx_http_ipng_stats_module.so; + +error_log stderr notice; + +events { + worker_connections 128; +} + +http { + ipng_stats_zone ipng:1m; + ipng_stats_flush_interval 500ms; + ipng_stats_default_source direct; + + log_format tagged '$remote_addr src=$ipng_source_tag vip=$server_addr ' + '"$request" $status $body_bytes_sent'; + access_log /var/log/nginx/access.log tagged; + + # Global logtail — fires for ALL requests regardless of server block. + log_format logtail '$host\t$remote_addr\t$ipng_source_tag\t$server_addr\t' + '$request_method\t$request_uri\t$status\t$body_bytes_sent\t' + '$request_time'; + ipng_stats_logtail logtail udp://127.0.0.1:9514 buffer=4k flush=500ms; + + server { + # Mgmt-only listener for direct traffic (tagged "direct"). + listen 172.20.40.2:8080; + + # Per-interface listeners for attributed traffic. + listen 10.0.1.1:8080 device=eth1 ipng_source_tag=cl1; + listen 10.0.2.1:8080 device=eth2 ipng_source_tag=cl2; + + server_name _; + + location / { + return 200 "ok $server_addr\n"; + } + + location /notfound { + return 404 "nope\n"; + } + + location /slow { + proxy_pass http://127.0.0.1:29080/; + } + } + + server { + listen 172.20.40.2:9113; + + location = /.well-known/ipng/statsz { + ipng_stats; + allow all; + } + } +} diff --git a/tests/01-module/lab/server/slow-backend.py b/tests/01-module/lab/server/slow-backend.py new file mode 100644 index 0000000..1da3155 --- /dev/null +++ b/tests/01-module/lab/server/slow-backend.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +# Minimal HTTP server that sleeps 50 ms before responding. +# Used by the test harness to produce measurable request durations. + +import http.server +import socketserver +import time + +class SlowHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + time.sleep(0.05) + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.end_headers() + self.wfile.write(b"slow\n") + + def log_message(self, format, *args): + pass + +with socketserver.TCPServer(("127.0.0.1", 29080), SlowHandler) as srv: + srv.serve_forever() diff --git a/tests/01-module/lab/server/start.sh b/tests/01-module/lab/server/start.sh new file mode 100644 index 0000000..2126d14 --- /dev/null +++ b/tests/01-module/lab/server/start.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# SPDX-License-Identifier: Apache-2.0 +# Server container entrypoint: installs nginx + module, waits for +# containerlab to create data-plane interfaces, starts the slow +# Python backend, and runs nginx in the foreground. + +# Suppress automatic service start/restart during apt/dpkg. +printf '#!/bin/sh\nexit 101\n' > /usr/sbin/policy-rc.d +chmod +x /usr/sbin/policy-rc.d + +apt-get update -qq +apt-get install -y -qq nginx python3 procps iproute2 ncat > /dev/null 2>&1 + +# Install the module .deb built by `make pkg-deb`. +dpkg -i /opt/build/libnginx-mod-http-ipng-stats_*.deb 2>/dev/null || true + +# Re-enable module symlink in case postinst disabled it. +ln -sf /etc/nginx/modules-available/50-mod-http-ipng-stats.conf \ + /etc/nginx/modules-enabled/50-mod-http-ipng-stats.conf + +# Remove the policy block now that packages are installed. +rm -f /usr/sbin/policy-rc.d + +# Wait for containerlab to attach the data-plane veth pairs. +for iface in eth1 eth2; do + echo "Waiting for $iface ..." + while ! ip link show "$iface" > /dev/null 2>&1; do + sleep 0.2 + done + ip link set "$iface" up +done + +ip addr add 10.0.1.1/24 dev eth1 +ip addr add 10.0.2.1/24 dev eth2 + +# Slow backend: 50 ms sleep per request. +python3 /opt/config/slow-backend.py & + +# UDP logtail listener — captures datagrams to a file for test validation. +ncat -u -l -k 127.0.0.1 9514 --recv-only >> /var/log/nginx/logtail-udp.log & + +exec nginx -g 'daemon off;' -c /opt/config/nginx.conf diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..b779ff3 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1 @@ +robotframework diff --git a/tests/rf-run.sh b/tests/rf-run.sh new file mode 100755 index 0000000..f96925d --- /dev/null +++ b/tests/rf-run.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# SPDX-License-Identifier: Apache-2.0 +# Test runner for nginx-ipng-stats-plugin robot tests. +# Usage: ./rf-run.sh +# runtime: docker (default) +# test_path: path to .robot file or directory (default: all tests) + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +CLAB_BIN="${CLAB_BIN:-containerlab}" +RUNTIME="${1:-docker}" +TEST="${2:-${SCRIPT_DIR}}" + +mkdir -p "${SCRIPT_DIR}/out" + +# Create venv if needed +if [ ! -d "${SCRIPT_DIR}/.venv" ]; then + python3 -m venv "${SCRIPT_DIR}/.venv" + "${SCRIPT_DIR}/.venv/bin/pip" install -q -r "${SCRIPT_DIR}/requirements.txt" +fi + +source "${SCRIPT_DIR}/.venv/bin/activate" + +get_logname() { + local name + name="$(basename "$1" .robot)" + if [ -d "$1" ]; then + name="$(basename "$1")" + fi + echo "$name" +} + +robot --consolecolors on -r none \ + --variable CLAB_BIN:"${CLAB_BIN}" \ + --variable runtime:"${RUNTIME}" \ + -l "${SCRIPT_DIR}/out/$(get_logname "${TEST}")-${RUNTIME}-log" \ + --output "${SCRIPT_DIR}/out/$(get_logname "${TEST}")-${RUNTIME}-out.xml" \ + "${TEST}"