From df05bae8a3b8d8db604d94a2906960b604d8d906 Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Sat, 18 Apr 2026 11:45:40 +0200 Subject: [PATCH] Support multiple device-pinned listens sharing a single port Nginx's config-level duplicate-listen check rejected the documented pattern of `listen 80 device=X ipng_source_tag=A; listen 80 device=Y ipng_source_tag=B;` with "a duplicate listen 0.0.0.0:80", and even when the dedup was bypassed the kernel refused the second bind() because the first socket was already holding the port without SO_BINDTODEVICE. The listen wrapper now detects same-sockaddr duplicates before the core handler sees them and records them with `needs_clone=1`. In init_module, phase 1 clones an ngx_listening_t for each such duplicate, phase 3 closes every inherited naked fd, and phase 4 rebinds every target with SO_REUSEADDR + SO_REUSEPORT + SO_BINDTODEVICE set before bind(). SO_REUSEPORT keeps `nginx -s reload` from colliding with the still-bound sockets held by old workers during graceful drain; IPV6_V6ONLY matches nginx's default so the IPv6 listen doesn't claim the IPv4 wildcard and collide with sibling IPv4-specific listens. Restructure 01-module to cover the pattern end-to-end: four device-pinned listens on port 8080 (eth1 shares tag `tag1` across v4 and v6; eth2 splits into `tag2-v4` / `tag2-v6`), clients and server both get IPv6 addresses, and a new "Per-(device, family) request count accuracy" case proves that 10 requests on each of the four combinations yields tag1=20, tag2-v4=10, tag2-v6=10. Mgmt/direct traffic moves to port 9180 so it no longer clashes with the shared-port wildcards. Document the constraint in docs/user-guide.md: all listens on a given port must carry `device=`, and direct traffic belongs on a separate port. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/user-guide.md | 46 +++- src/ngx_http_ipng_stats_module.c | 330 +++++++++++++++++------- tests/01-module/01-e2e.robot | 121 ++++++--- tests/01-module/lab/client/start.sh | 3 + tests/01-module/lab/ipng-stats.clab.yml | 13 +- tests/01-module/lab/server/nginx.conf | 38 ++- tests/01-module/lab/server/start.sh | 2 + 7 files changed, 400 insertions(+), 153 deletions(-) diff --git a/docs/user-guide.md b/docs/user-guide.md index 2638e5f..ce9b8d8 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -92,7 +92,7 @@ ip -6 -s link show gre-mg1 The plugin needs three things in `nginx.conf`: 1. A shared-memory zone for counters (`ipng_stats_zone`). -2. A set of `listen` directives — a wildcard fallback plus one device-bound listener per attributed interface. +2. One device-bound `listen` directive per attributed (interface, address family) pair. 3. A scrape location serving the `ipng_stats` handler. A minimal working configuration looks like this: @@ -109,17 +109,25 @@ http { ipng_stats_flush_interval 1s; ipng_stats_default_source direct; - # A normal vhost. The fallback listen lines serve direct web traffic; - # the included file adds one device-bound listen per attributed interface. + # Attributed vhost. Every listen on this port must be device-tagged — + # see "All listens on a shared port must be device-tagged" below. server { - listen 80; - listen [::]:80; include /etc/nginx/ipng-stats/listens.conf; server_name _; root /var/www/html; } + # Direct (un-attributed) traffic on a separate port — the listen has no + # device=, so requests get the `ipng_stats_default_source` tag. + server { + listen 198.51.100.1:8081 default_server; + listen [2001:db8::1]:8081 default_server; + + server_name _; + root /var/www/html; + } + # A second server block exposing the scrape endpoint on a locked-down port. server { listen 127.0.0.1:9113; @@ -165,12 +173,30 @@ You do not need to enumerate VIPs in `listen`. A wildcard `listen 80 device=gre- served through the `gre-mg1` interface, and nginx routes per-request to the right vhost by `server_name` / `Host:` header. Adding a new VIP is a `server_name` change; adding a new interface is an append to `listens.conf`. -### Why both a wildcard and device-bound listens? +### All listens on a shared port must be device-tagged -The fallback `listen 80;` / `listen [::]:80;` catches traffic arriving on any interface that isn't one of your attributed interfaces — -for example, real clients hitting your host directly over `eth0`. The kernel's TCP socket lookup prefers the most-specific -(device-matching) listener, so a SYN on `gre-mg1` always lands on the `mg1` socket, and a SYN on `eth0` always lands on the fallback. -No races, no stealing. Direct traffic is counted under the tag set by `ipng_stats_default_source` (`direct` by default). +If you use multiple `listen` directives on the same port (e.g. port 80), **every one of them must carry `device=`**. Mixing a +device-pinned listen with a plain `listen 80;` or with an address-specific `listen 192.0.2.1:80;` on the same port is **not +supported** and nginx will fail to start. This is a kernel-level limitation: a device-pinned socket sets `SO_BINDTODEVICE` before +`bind(2)`, while a plain wildcard socket sets no device filter — Linux refuses to hold both on the same `(addr, port)` tuple, so +the second bind fails with `EADDRINUSE` regardless of what the nginx config-level dedup might do. + +For "direct" traffic — clients hitting the host on a non-attributed interface — use a **separate port** on the direct interface +(e.g. `listen 198.51.100.1:8081;`). That listen then has no `device=`, so it falls back to the tag set by +`ipng_stats_default_source` (`direct` by default). + +### Sharing a single port across address families and devices + +Within the device-tagged set, you're free to share port numbers freely across devices and address families: as long as each listen +has a distinct `device=`, the kernel keeps them apart, and within one device you can either reuse a single tag or split by family. +For example: + +```nginx +listen 80 device=gre-mg1 ipng_source_tag=mg1; +listen [::]:80 device=gre-mg1 ipng_source_tag=mg1; # same tag across families +listen 80 device=gre-mg2 ipng_source_tag=mg2-v4; +listen [::]:80 device=gre-mg2 ipng_source_tag=mg2-v6; # per-family tags +``` ## 4. Verify with curl diff --git a/src/ngx_http_ipng_stats_module.c b/src/ngx_http_ipng_stats_module.c index c0e2ae6..6a0d7b3 100644 --- a/src/ngx_http_ipng_stats_module.c +++ b/src/ngx_http_ipng_stats_module.c @@ -182,13 +182,21 @@ typedef struct { /* Per-listen binding recorded by the listen wrapper at config parse - * time. Resolved to an ngx_listening_t* at init_module time. */ + * time. Resolved to an ngx_listening_t* at init_module time. + * + * `needs_clone` marks a listen directive that shares a sockaddr with + * an earlier binding. Nginx's core listen handler rejects such + * duplicates with "a duplicate listen ..."; our wrapper therefore + * skips the core call for these, and init_module manufactures an + * ngx_listening_t for each by cloning the template it shares the + * sockaddr with. */ typedef struct { ngx_str_t device; ngx_str_t source; ngx_sockaddr_t sockaddr; socklen_t socklen; ngx_listening_t *listening; /* filled at init_module */ + unsigned needs_clone:1; } ngx_http_ipng_stats_binding_t; @@ -495,47 +503,33 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd, i++; } - if (device.len > 0 || source.len > 0) { - /* Force nginx to create a dedicated listening socket for this - * address even when a wildcard on the same port already exists. - * Without `bind`, nginx's optimizer eliminates specific-address - * sockets that are covered by a wildcard, which would prevent us - * from applying SO_BINDTODEVICE and tagging traffic per device. */ - ngx_str_t *bind_arg = ngx_array_push(cf->args); - if (bind_arg == NULL) { - return NGX_CONF_ERROR; - } - ngx_str_set(bind_arg, "bind"); - } - - rv = ngx_http_core_listen_orig(cf, cmd, conf); - if (rv != NGX_CONF_OK) { - return rv; - } - if (device.len == 0 && source.len == 0) { - return NGX_CONF_OK; + /* Plain listen with no module-specific parameters — let nginx + * handle it end-to-end. */ + return ngx_http_core_listen_orig(cf, cmd, conf); } - if (cf->args->nelts < 2) { - return NGX_CONF_OK; + /* Force nginx to bind a dedicated socket for this address rather + * than folding it into a wildcard. Without `bind`, nginx's listen + * optimizer discards specific-address entries covered by a + * wildcard, which prevents us from applying SO_BINDTODEVICE. */ + ngx_str_t *bind_arg = ngx_array_push(cf->args); + if (bind_arg == NULL) { + return NGX_CONF_ERROR; } + ngx_str_set(bind_arg, "bind"); - /* Listen options are not stored on the core srv conf in a way we - * can cheaply recover after the original handler runs (the core - * `listen` field is a 1-bit flag). Instead we reparse the address - * argument ourselves so we know which sockaddr to match against - * cycle->listening[] at init_module time. */ + /* Parse the listen address ourselves: we need the sockaddr to + * detect duplicates (multiple `listen 80 device=X` at the same + * addr) and to match bindings to cycle->listening[] entries in + * init_module. */ ngx_memzero(&u, sizeof(ngx_url_t)); u.url = value[1]; u.listen = 1; u.default_port = 80; if (ngx_parse_url(cf->pool, &u) != NGX_OK || u.naddrs == 0) { - /* The original handler already accepted this address, so a - * reparse failure would be surprising. Skip binding rather - * than fail the reload. */ - return NGX_CONF_OK; + return ngx_http_core_listen_orig(cf, cmd, conf); } imcf = ngx_http_conf_get_module_main_conf(cf, ngx_http_ipng_stats_module); @@ -548,6 +542,31 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd, } } + /* Call the core handler at most once per (addr, port). Any + * subsequent listen at the same sockaddr would hit nginx's + * duplicate-listen check — we skip it and let init_module clone + * the first-seen listening entry for each duplicate. */ + ngx_http_ipng_stats_binding_t *existing = imcf->bindings->elts; + ngx_uint_t dup = 0; + for (i = 0; i < imcf->bindings->nelts; i++) { + if (existing[i].socklen == u.addrs[0].socklen + && ngx_cmp_sockaddr((struct sockaddr *) &existing[i].sockaddr, + existing[i].socklen, + u.addrs[0].sockaddr, + u.addrs[0].socklen, 1) == NGX_OK) + { + dup = 1; + break; + } + } + + if (!dup) { + rv = ngx_http_core_listen_orig(cf, cmd, conf); + if (rv != NGX_CONF_OK) { + return rv; + } + } + /* Record one binding per resolved address (ngx_parse_url may yield * multiple for a hostname; listen specs use literal addresses so * naddrs is almost always 1). */ @@ -580,6 +599,7 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd, b->socklen = u.addrs[i].socklen; ngx_memcpy(&b->sockaddr, u.addrs[i].sockaddr, u.addrs[i].socklen); + b->needs_clone = dup ? 1 : 0; } return NGX_CONF_OK; @@ -1160,98 +1180,226 @@ ngx_http_ipng_stats_init_zone(ngx_shm_zone_t *shm_zone, void *data) /* ----------------------------------------------------------------- */ -/* init_module: apply SO_BINDTODEVICE to the opened listen sockets */ +/* init_module: rebind listen sockets with SO_BINDTODEVICE */ /* ----------------------------------------------------------------- */ +/* Create a device-pinned listening socket: socket + SO_REUSEADDR + + * SO_REUSEPORT + SO_BINDTODEVICE (set *before* bind, which is what + * lets the kernel permit multiple sockets on the same wildcard + * addr+port) + bind + listen + nonblocking. + * + * SO_REUSEPORT is required so that on `nginx -s reload` the new + * master's rebind doesn't collide with the still-bound sockets held + * by the old workers during their graceful-drain window. The kernel + * still uses SO_BINDTODEVICE to filter per device, so traffic + * attribution stays correct. + * + * Returns the new fd, or -1 on failure with errno preserved. */ +static ngx_socket_t +ngx_http_ipng_stats_open_dev_socket(ngx_cycle_t *cycle, + struct sockaddr *sa, socklen_t salen, int backlog, ngx_str_t *device) +{ + ngx_socket_t s; + int one = 1; + char devname[IFNAMSIZ]; + size_t dlen; + + s = socket(sa->sa_family, SOCK_STREAM, 0); + if (s == (ngx_socket_t) -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "ipng_stats: socket() failed"); + return (ngx_socket_t) -1; + } + if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "ipng_stats: setsockopt(SO_REUSEADDR) failed"); + close(s); + return (ngx_socket_t) -1; + } + if (setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "ipng_stats: setsockopt(SO_REUSEPORT) failed"); + close(s); + return (ngx_socket_t) -1; + } +#if (NGX_HAVE_INET6) + if (sa->sa_family == AF_INET6) { + /* Match nginx's default (ipv6only=on): keep the [::]:X listen + * strictly IPv6. Without this, Linux's bindv6only=0 default + * makes the socket claim the IPv4 wildcard too, which collides + * with sibling IPv4-specific listens on the same port (e.g. a + * mgmt-address listener). */ + if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one)) == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "ipng_stats: setsockopt(IPV6_V6ONLY) failed"); + close(s); + return (ngx_socket_t) -1; + } + } +#endif + dlen = device->len < IFNAMSIZ - 1 ? device->len : IFNAMSIZ - 1; + ngx_memcpy(devname, device->data, dlen); + devname[dlen] = '\0'; + if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE, devname, + (socklen_t) (dlen + 1)) == -1) + { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "ipng_stats: setsockopt(SO_BINDTODEVICE, \"%s\") failed", + devname); + close(s); + return (ngx_socket_t) -1; + } + if (bind(s, sa, salen) == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "ipng_stats: bind() failed for device \"%s\"", devname); + close(s); + return (ngx_socket_t) -1; + } + if (listen(s, backlog) == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "ipng_stats: listen() failed for device \"%s\"", devname); + close(s); + return (ngx_socket_t) -1; + } + if (ngx_nonblocking(s) == -1) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, + "ipng_stats: fcntl(O_NONBLOCK) failed"); + close(s); + return (ngx_socket_t) -1; + } + return s; +} + + static ngx_int_t ngx_http_ipng_stats_init_module(ngx_cycle_t *cycle) { ngx_http_ipng_stats_main_conf_t *imcf; ngx_http_ipng_stats_binding_t *bindings; - ngx_listening_t *ls; + ngx_listening_t *ls, *tmpl, *dup; ngx_uint_t i, j; - char devname[IFNAMSIZ]; - size_t dlen; + ngx_uint_t *target_idx; + u_char *claimed; imcf = ngx_http_cycle_get_module_main_conf(cycle, ngx_http_ipng_stats_module); - if (imcf == NULL) { - ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, - "ipng_stats: init_module: imcf is NULL"); - return NGX_OK; - } - if (imcf->bindings == NULL) { - ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, - "ipng_stats: init_module: no bindings (no device= " - "or ipng_source_tag= on any listen)"); + if (imcf == NULL || imcf->bindings == NULL) { return NGX_OK; } bindings = imcf->bindings->elts; - ls = cycle->listening.elts; - ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, - "ipng_stats: init_module: %ui bindings, %ui listeners", - imcf->bindings->nelts, cycle->listening.nelts); - - for (i = 0; i < cycle->listening.nelts; i++) { - ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, - "ipng_stats: listener[%ui]: fd=%d addr=%V socklen=%d", - i, (int) ls[i].fd, &ls[i].addr_text, - (int) ls[i].socklen); + /* Phase 1: append a cloned ngx_listening_t for every duplicate + * binding. ngx_array_push may reallocate elts, so we store indices + * not pointers. `target_idx[j]` is the index into cycle->listening + * that binding j ends up owning. */ + target_idx = ngx_pcalloc(cycle->pool, + imcf->bindings->nelts * sizeof(ngx_uint_t)); + if (target_idx == NULL) { + return NGX_ERROR; } for (j = 0; j < imcf->bindings->nelts; j++) { - ngx_int_t matched = 0; + if (!bindings[j].needs_clone) continue; + ls = cycle->listening.elts; + tmpl = NULL; for (i = 0; i < cycle->listening.nelts; i++) { - if (ls[i].socklen != bindings[j].socklen) { - continue; + if (ls[i].socklen == bindings[j].socklen + && ngx_cmp_sockaddr(ls[i].sockaddr, ls[i].socklen, + (struct sockaddr *) &bindings[j].sockaddr, + bindings[j].socklen, 1) == NGX_OK) + { + tmpl = &ls[i]; + break; } + } + if (tmpl == NULL) { + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "ipng_stats: init_module: no template listening " + "found for cloned binding (source=\"%V\")", + &bindings[j].source); + return NGX_ERROR; + } + + dup = ngx_array_push(&cycle->listening); + if (dup == NULL) return NGX_ERROR; + *dup = *tmpl; + dup->fd = (ngx_socket_t) -1; + dup->previous = NULL; + + target_idx[j] = cycle->listening.nelts - 1; + } + + /* Phase 2: map every non-clone binding to an existing listening + * entry (by sockaddr, first unclaimed wins). */ + claimed = ngx_pcalloc(cycle->pool, cycle->listening.nelts); + if (claimed == NULL) return NGX_ERROR; + + for (j = 0; j < imcf->bindings->nelts; j++) { + if (bindings[j].needs_clone) { + claimed[target_idx[j]] = 1; + continue; + } + ls = cycle->listening.elts; + for (i = 0; i < cycle->listening.nelts; i++) { + if (claimed[i]) continue; + if (ls[i].socklen != bindings[j].socklen) continue; if (ngx_cmp_sockaddr(ls[i].sockaddr, ls[i].socklen, (struct sockaddr *) &bindings[j].sockaddr, bindings[j].socklen, 1) != NGX_OK) { continue; } - matched = 1; - bindings[j].listening = &ls[i]; - - if (bindings[j].device.len > 0 && ls[i].fd != (ngx_socket_t) -1) { - dlen = bindings[j].device.len < IFNAMSIZ - 1 - ? bindings[j].device.len : IFNAMSIZ - 1; - ngx_memcpy(devname, bindings[j].device.data, dlen); - devname[dlen] = '\0'; - - if (setsockopt(ls[i].fd, SOL_SOCKET, SO_BINDTODEVICE, - devname, (socklen_t) (dlen + 1)) == -1) - { - ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, - "ipng_stats: setsockopt(SO_BINDTODEVICE, " - "\"%s\") failed for listen fd %d", - devname, (int) ls[i].fd); - return NGX_ERROR; - } - ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, - "ipng_stats: bound listen fd %d to device " - "\"%s\" (source=\"%V\")", - (int) ls[i].fd, devname, &bindings[j].source); - } - break; + target_idx[j] = i; + claimed[i] = 1; + goto found; } - if (!matched) { - u_char buf[NGX_SOCKADDR_STRLEN]; - size_t len; - len = ngx_sock_ntop((struct sockaddr *) &bindings[j].sockaddr, - bindings[j].socklen, buf, sizeof(buf), 1); - ngx_log_error(NGX_LOG_WARN, cycle->log, 0, - "ipng_stats: no listener matched binding " - "source=\"%V\" addr=%*s socklen=%d", - &bindings[j].source, len, buf, - (int) bindings[j].socklen); + ngx_log_error(NGX_LOG_EMERG, cycle->log, 0, + "ipng_stats: init_module: no listening entry for " + "binding (source=\"%V\")", &bindings[j].source); + return NGX_ERROR; + found: + ; + } + + /* Phase 3: close every pre-bound fd on a target listening entry. + * nginx ran ngx_open_listening_sockets before init_module, so the + * first-seen listen at each sockaddr has a naked bind that would + * block subsequent device-pinned binds on the same addr. Free the + * ports before we rebind. */ + ls = cycle->listening.elts; + for (j = 0; j < imcf->bindings->nelts; j++) { + ngx_listening_t *t = &ls[target_idx[j]]; + if (t->fd != (ngx_socket_t) -1) { + close(t->fd); + t->fd = (ngx_socket_t) -1; } } + /* Phase 4: rebind each target with SO_BINDTODEVICE set before + * bind(). `ls->inherited = 1` tells nginx not to touch the socket + * in any subsequent setup pass. */ + for (j = 0; j < imcf->bindings->nelts; j++) { + ngx_listening_t *t = &ls[target_idx[j]]; + ngx_socket_t s; + + s = ngx_http_ipng_stats_open_dev_socket(cycle, + t->sockaddr, t->socklen, t->backlog, &bindings[j].device); + if (s == (ngx_socket_t) -1) { + return NGX_ERROR; + } + t->fd = s; + t->inherited = 1; + bindings[j].listening = t; + + ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, + "ipng_stats: listen %V bound to device \"%V\" " + "(source=\"%V\", fd=%d)", + &t->addr_text, &bindings[j].device, + &bindings[j].source, (int) s); + } + return NGX_OK; } diff --git a/tests/01-module/01-e2e.robot b/tests/01-module/01-e2e.robot index f32ada4..3036f4b 100644 --- a/tests/01-module/01-e2e.robot +++ b/tests/01-module/01-e2e.robot @@ -18,7 +18,7 @@ ${SERVER} clab-${lab-name}-server ${CLIENT1} clab-${lab-name}-client1 ${CLIENT2} clab-${lab-name}-client2 ${SCRAPE_URL} http://172.20.40.2:9113/.well-known/ipng/statsz -${SERVER_MGMT} http://172.20.40.2:8080 +${SERVER_MGMT} http://172.20.40.2:9180 *** Test Cases *** @@ -44,22 +44,42 @@ JSON scrape # --- Per-device attribution --- -Attribute cl1 via eth1 - [Documentation] Traffic on server:eth1 carries source_tag=cl1, vip=10.0.1.1. +Attribute tag1 via eth1 (v4) + [Documentation] IPv4 traffic on server:eth1 carries source_tag=tag1. Send Fast Requests ${CLIENT1} 10.0.1.1 5 Wait For Flush ${output} = Scrape Prometheus - Should Contain ${output} source_tag="cl1" + Should Contain ${output} source_tag="tag1" Should Contain ${output} vip="10.0.1.1" -Attribute cl2 via eth2 - [Documentation] Traffic on server:eth2 carries source_tag=cl2, vip=10.0.2.1. +Attribute tag2-v4 via eth2 (v4) + [Documentation] IPv4 traffic on server:eth2 carries source_tag=tag2-v4. Send Fast Requests ${CLIENT2} 10.0.2.1 5 Wait For Flush ${output} = Scrape Prometheus - Should Contain ${output} source_tag="cl2" + Should Contain ${output} source_tag="tag2-v4" Should Contain ${output} vip="10.0.2.1" +Attribute tag1 via eth1 (v6) + [Documentation] IPv6 traffic on server:eth1 carries source_tag=tag1 + ... — same tag as v4, demonstrating that tag= can be + ... shared across address families for one device. + Send Fast Requests v6 ${CLIENT1} 2001:db8:1::1 5 + Wait For Flush + ${output} = Scrape With Filter source_tag=tag1 + Should Contain ${output} source_tag="tag1" + Should Contain ${output} vip="2001:db8:1::1" + +Attribute tag2-v6 via eth2 (v6) + [Documentation] IPv6 traffic on server:eth2 carries source_tag=tag2-v6 + ... — distinct from the eth2 v4 tag, demonstrating + ... per-(device, family) attribution. + Send Fast Requests v6 ${CLIENT2} 2001:db8:2::1 5 + Wait For Flush + ${output} = Scrape Prometheus + Should Contain ${output} source_tag="tag2-v6" + Should Contain ${output} vip="2001:db8:2::1" + Direct traffic tagged [Documentation] Mgmt-interface traffic carries source_tag=direct. ${rc} ${output} = Run And Return Rc And Output @@ -76,7 +96,7 @@ Per-class code counters Docker Exec Ignore Rc ${CLIENT1} curl -s http://10.0.1.1:8080/notfound Docker Exec Ignore Rc ${CLIENT1} curl -s http://10.0.1.1:8080/notfound Wait For Flush - ${output} = Scrape With Filter source_tag=cl1 + ${output} = Scrape With Filter source_tag=tag1 Should Contain ${output} code="4xx" Should Contain ${output} code="2xx" @@ -86,11 +106,11 @@ Duration histogram [Documentation] proxy_pass to a 50 ms backend populates sum and buckets. Send Slow Requests ${CLIENT1} 10.0.1.1 3 Wait For Flush - ${prom} = Scrape With Filter source_tag=cl1 + ${prom} = Scrape With Filter source_tag=tag1 Should Match Regexp ${prom} request_duration_seconds_sum\\{[^}]*\\}\\s+\\d+\\.\\d*[1-9] ${rc} ${json} = Run And Return Rc And Output - ... curl -sf -H 'Accept: application/json' '${SCRAPE_URL}?source_tag=cl1' | python3 -m json.tool + ... curl -sf -H 'Accept: application/json' '${SCRAPE_URL}?source_tag=tag1' | python3 -m json.tool Should Be Equal As Integers ${rc} 0 Should Contain ${json} request_duration_ms Should Contain ${json} buckets @@ -98,14 +118,14 @@ Duration histogram # --- Scrape filters --- Filter by source_tag - [Documentation] ?source_tag=cl1 returns cl1 only; cl2 only. - ${output} = Scrape With Filter source_tag=cl1 - Should Contain ${output} source_tag="cl1" - Should Not Contain ${output} source_tag="cl2" + [Documentation] ?source_tag=tag1 returns tag1 only; tag2-v4 only. + ${output} = Scrape With Filter source_tag=tag1 + Should Contain ${output} source_tag="tag1" + Should Not Contain ${output} source_tag="tag2-v4" - ${output} = Scrape With Filter source_tag=cl2 - Should Contain ${output} source_tag="cl2" - Should Not Contain ${output} source_tag="cl1" + ${output} = Scrape With Filter source_tag=tag2-v4 + Should Contain ${output} source_tag="tag2-v4" + Should Not Contain ${output} source_tag="tag1" Filter by VIP [Documentation] ?vip=10.0.1.1 excludes 10.0.2.1. @@ -115,10 +135,10 @@ Filter by VIP Filter combined [Documentation] source_tag + vip intersection. - ${output} = Scrape With Filter source_tag=cl1&vip=10.0.1.1 - Should Contain ${output} source_tag="cl1" + ${output} = Scrape With Filter source_tag=tag1&vip=10.0.1.1 + Should Contain ${output} source_tag="tag1" Should Contain ${output} vip="10.0.1.1" - Should Not Contain ${output} source_tag="cl2" + Should Not Contain ${output} source_tag="tag2-v4" Filter unknown tag [Documentation] Unknown source_tag returns empty data set. @@ -128,18 +148,18 @@ Filter unknown tag # --- nginx variable --- Variable in access log - [Documentation] $ipng_source_tag appears as cl1, cl2, direct in log. + [Documentation] $ipng_source_tag appears as tag1, tag2-v4, direct in log. ${output} = Docker Exec ${SERVER} cat /var/log/nginx/access.log - Should Match Regexp ${output} src=cl1 - Should Match Regexp ${output} src=cl2 + Should Match Regexp ${output} src=tag1 + Should Match Regexp ${output} src=tag2-v4 Should Match Regexp ${output} src=direct UDP logtail [Documentation] ipng_stats_logtail udp:// sends log lines to a local ... nc listener; captured file has all sources and VIPs. ${output} = Docker Exec ${SERVER} cat /var/log/nginx/logtail-udp.log - Should Match Regexp ${output} cl1 - Should Match Regexp ${output} cl2 + Should Match Regexp ${output} tag1 + Should Match Regexp ${output} tag2-v4 Should Match Regexp ${output} direct Should Match Regexp ${output} 10\\.0\\.1\\.1 Should Match Regexp ${output} 10\\.0\\.2\\.1 @@ -166,10 +186,10 @@ VIP in access log Counters survive reload [Documentation] Shared-memory zone persists across nginx -s reload. - ${before} = Get Request Count cl1 + ${before} = Get Request Count tag1 Docker Exec ${SERVER} nginx -s reload Sleep 2s Wait for new workers - ${after} = Get Request Count cl1 + ${after} = Get Request Count tag1 Should Be True ${after} >= ${before} ... Counters dropped after reload: before=${before} after=${after} @@ -177,24 +197,37 @@ Traffic after reload [Documentation] New requests are counted after reload. Send Fast Requests ${CLIENT1} 10.0.1.1 3 Wait For Flush - ${output} = Scrape With Filter source_tag=cl1 - Should Contain ${output} source_tag="cl1" + ${output} = Scrape With Filter source_tag=tag1 + Should Contain ${output} source_tag="tag1" # --- Counter correctness --- -Request count accuracy - [Documentation] 10 requests per client yields exactly 10 delta. - ${before_cl1} = Get Request Count cl1 - ${before_cl2} = Get Request Count cl2 - Send Fast Requests ${CLIENT1} 10.0.1.1 10 - Send Fast Requests ${CLIENT2} 10.0.2.1 10 +Per-(device, family) request count accuracy + [Documentation] 10 requests on each of the four (device, family) + ... combinations yields tag1=20, tag2-v4=10, tag2-v6=10. + ... Demonstrates that one device can combine v4+v6 under + ... a single tag while another device can split them. + ${before_tag1} = Get Request Count tag1 + ${before_tag2v4} = Get Request Count tag2-v4 + ${before_tag2v6} = Get Request Count tag2-v6 + + Send Fast Requests ${CLIENT1} 10.0.1.1 10 + Send Fast Requests v6 ${CLIENT1} 2001:db8:1::1 10 + Send Fast Requests ${CLIENT2} 10.0.2.1 10 + Send Fast Requests v6 ${CLIENT2} 2001:db8:2::1 10 Wait For Flush - ${after_cl1} = Get Request Count cl1 - ${after_cl2} = Get Request Count cl2 - ${delta_cl1} = Evaluate ${after_cl1} - ${before_cl1} - ${delta_cl2} = Evaluate ${after_cl2} - ${before_cl2} - Should Be Equal As Integers ${delta_cl1} 10 - Should Be Equal As Integers ${delta_cl2} 10 + + ${after_tag1} = Get Request Count tag1 + ${after_tag2v4} = Get Request Count tag2-v4 + ${after_tag2v6} = Get Request Count tag2-v6 + + ${delta_tag1} = Evaluate ${after_tag1} - ${before_tag1} + ${delta_tag2v4} = Evaluate ${after_tag2v4} - ${before_tag2v4} + ${delta_tag2v6} = Evaluate ${after_tag2v6} - ${before_tag2v6} + + Should Be Equal As Integers ${delta_tag1} 20 + Should Be Equal As Integers ${delta_tag2v4} 10 + Should Be Equal As Integers ${delta_tag2v6} 10 *** Keywords *** @@ -249,6 +282,12 @@ Send Fast Requests Docker Exec ${client} curl -sf http://${server_ip}:8080/ END +Send Fast Requests v6 + [Arguments] ${client} ${server_ip} ${count} + FOR ${i} IN RANGE ${count} + Docker Exec ${client} curl -sf http://[${server_ip}]:8080/ + END + Send Slow Requests [Arguments] ${client} ${server_ip} ${count} FOR ${i} IN RANGE ${count} diff --git a/tests/01-module/lab/client/start.sh b/tests/01-module/lab/client/start.sh index 7b1a4a3..4031f78 100644 --- a/tests/01-module/lab/client/start.sh +++ b/tests/01-module/lab/client/start.sh @@ -15,6 +15,9 @@ while ! ip link show eth1 > /dev/null 2>&1; do done ip link set eth1 up ip addr add ${MY_IP} dev eth1 +if [ -n "${MY_IP6}" ]; then + ip -6 addr add ${MY_IP6} dev eth1 nodad +fi # Remove the default route so packets to 10.0.x.0/24 go out eth1 # (the connected route) instead of through the mgmt bridge. diff --git a/tests/01-module/lab/ipng-stats.clab.yml b/tests/01-module/lab/ipng-stats.clab.yml index bb0d5bf..bbcceda 100644 --- a/tests/01-module/lab/ipng-stats.clab.yml +++ b/tests/01-module/lab/ipng-stats.clab.yml @@ -3,12 +3,13 @@ # # Three nodes: # server — nginx with the module, a slow Python backend, two data-plane interfaces -# client1 — sends traffic via eth1 (attributed to source_tag=cl1) -# client2 — sends traffic via eth2 (attributed to source_tag=cl2) +# client1 — sends traffic via eth1 (attributed to source_tag=tag1 for both v4 and v6) +# client2 — sends traffic via eth2 (attributed to source_tag=tag2-v4 for v4, +# tag2-v6 for v6 — demonstrates per-(device, family) attribution) # -# Links: -# server:eth1 ←→ client1:eth1 (10.0.1.0/24) -# server:eth2 ←→ client2:eth1 (10.0.2.0/24) +# Links (each carries a /24 and a /64): +# server:eth1 ←→ client1:eth1 (10.0.1.0/24 + 2001:db8:1::/64) +# server:eth2 ←→ client2:eth1 (10.0.2.0/24 + 2001:db8:2::/64) name: ipng-stats-test @@ -38,6 +39,7 @@ topology: cmd: bash /start.sh env: MY_IP: 10.0.1.2/24 + MY_IP6: 2001:db8:1::2/64 client2: kind: linux @@ -48,6 +50,7 @@ topology: cmd: bash /start.sh env: MY_IP: 10.0.2.2/24 + MY_IP6: 2001:db8:2::2/64 links: - endpoints: ["server:eth1", "client1:eth1"] diff --git a/tests/01-module/lab/server/nginx.conf b/tests/01-module/lab/server/nginx.conf index de3d84c..c67a97a 100644 --- a/tests/01-module/lab/server/nginx.conf +++ b/tests/01-module/lab/server/nginx.conf @@ -1,5 +1,17 @@ # SPDX-License-Identifier: Apache-2.0 # Test nginx configuration for the ipng_stats module. +# +# Data plane (port 8080) uses four wildcard listens — two address +# families × two devices — to exercise per-(device, family) +# attribution. eth1 uses the same tag (`tag1`) for IPv4 and IPv6, +# while eth2 splits them (`tag2-v4` / `tag2-v6`) so the e2e suite +# can verify that the module can either combine or distinguish +# families per device. +# +# Mgmt/direct traffic hits a separate server block on port 9180. +# Mixing a naked `listen 8080;` or a specific-address `listen +# 172.20.40.2:8080;` with device-tagged wildcards on the same port +# is not supported — see docs/user-guide.md. load_module /usr/lib/nginx/modules/ngx_http_ipng_stats_module.so; @@ -30,12 +42,13 @@ http { ipng_stats_logtail ipng_stats_logtail udp://127.0.0.1:9514 buffer=4k flush=500ms if=$logtail_enabled; server { - # Mgmt-only listener for direct traffic (tagged "direct"). - listen 172.20.40.2:8080; - - # Per-interface listeners for attributed traffic. - listen 10.0.1.1:8080 device=eth1 ipng_source_tag=cl1; - listen 10.0.2.1:8080 device=eth2 ipng_source_tag=cl2; + # Per-device wildcard listens. All four share port 8080; the + # kernel's SO_BINDTODEVICE filtering routes each incoming packet + # to the socket pinned to the interface it arrived on. + listen 8080 device=eth1 ipng_source_tag=tag1; + listen [::]:8080 device=eth1 ipng_source_tag=tag1; + listen 8080 device=eth2 ipng_source_tag=tag2-v4; + listen [::]:8080 device=eth2 ipng_source_tag=tag2-v6; server_name _; @@ -52,6 +65,19 @@ http { } } + server { + # Direct (mgmt) traffic: no device binding on the listen, + # `ipng_stats_default_source direct;` therefore tags it "direct". + # Separate port so it doesn't collide with the device-tagged + # wildcards above. + listen 172.20.40.2:9180; + server_name _; + + location / { + return 200 "ok direct\n"; + } + } + server { listen 172.20.40.2:9113; diff --git a/tests/01-module/lab/server/start.sh b/tests/01-module/lab/server/start.sh index 2126d14..a84c57e 100644 --- a/tests/01-module/lab/server/start.sh +++ b/tests/01-module/lab/server/start.sh @@ -32,6 +32,8 @@ done ip addr add 10.0.1.1/24 dev eth1 ip addr add 10.0.2.1/24 dev eth2 +ip -6 addr add 2001:db8:1::1/64 dev eth1 nodad +ip -6 addr add 2001:db8:2::1/64 dev eth2 nodad # Slow backend: 50 ms sleep per request. python3 /opt/config/slow-backend.py &