From 1f144f4c190c64c5e29f29f4fcc18b7297ce2153 Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Sat, 18 Apr 2026 13:15:04 +0200 Subject: [PATCH] Fix shared-listen-include pattern across multiple server blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v0.3.0's listen wrapper treated every listen beyond the first at a given sockaddr as a skip-core "duplicate", which was correct for two `listen 80 device=X/Y;` lines in one server block but broke on the real deployment pattern where every server-*.conf pulls in the same `include listens.conf;`. Symptoms: * every server block after the first ended up with no listen directive processed, so nginx assigned them the default `*:80`, producing a flood of "conflicting server name" warnings and attaching every server block to an unrelated wildcard bind; * the bindings list grew linearly with the number of server blocks, so init_module tried to create (server_blocks) × (devices × families) listening sockets and hit EMFILE. Replace the single dedup with two independent checks: * listens_seen is a (cscf, sockaddr) ledger. The core listen handler is invoked at most once per (server block, sockaddr), matching nginx's own duplicate check so server-block N just attaches its cscf to the existing address via ngx_http_add_server. * `bind` is added only for the first global occurrence of each sockaddr; subsequent cscfs inherit opt.set/opt.bind from the first, which is what keeps nginx's "duplicate listen options" check happy across server blocks. * bindings dedup on (sockaddr, device) globally, so init_module creates one socket per unique pair regardless of how many server blocks reference it. Add a regression test at tests/01-module/ that wires three server blocks to the same ipng-listens.inc and asserts that nginx -t is clean and exactly four sockets are bound on port 8080. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ngx_http_ipng_stats_module.c | 146 +++++++++++++++----- tests/01-module/01-e2e.robot | 17 +++ tests/01-module/lab/ipng-stats.clab.yml | 1 + tests/01-module/lab/server/ipng-listens.inc | 12 ++ tests/01-module/lab/server/nginx.conf | 35 +++-- 5 files changed, 169 insertions(+), 42 deletions(-) create mode 100644 tests/01-module/lab/server/ipng-listens.inc diff --git a/src/ngx_http_ipng_stats_module.c b/src/ngx_http_ipng_stats_module.c index 6a0d7b3..9681d1a 100644 --- a/src/ngx_http_ipng_stats_module.c +++ b/src/ngx_http_ipng_stats_module.c @@ -200,6 +200,21 @@ typedef struct { } ngx_http_ipng_stats_binding_t; +/* Per-(cscf, sockaddr) tracking used only during config parse. The + * listen wrapper uses it to avoid invoking the core `listen` handler + * twice for the same (server block, sockaddr) pair — a valid config + * with N device-tagged listens on the same port in the same server + * block generates exactly one core-handler call and one cscf + * attachment, with the remaining listens handled by init_module + * cloning. Across server blocks the same sockaddr legitimately recurs, + * and each distinct cscf gets its own core-handler call. */ +typedef struct { + void *cscf; /* ngx_http_core_srv_conf_t * */ + ngx_sockaddr_t sockaddr; + socklen_t socklen; +} ngx_http_ipng_stats_seen_t; + + typedef struct { ngx_shm_zone_t *shm_zone; ngx_str_t zone_name; @@ -211,6 +226,7 @@ typedef struct { ngx_uint_t nbytebuckets; ngx_uint_t *byte_bucket_bounds; /* len = nbytebuckets, bytes */ ngx_array_t *bindings; /* ngx_http_ipng_stats_binding_t */ + ngx_array_t *listens_seen; /* ngx_http_ipng_stats_seen_t */ ngx_flag_t enabled; /* Global logtail (FR-8) — UDP-only. */ @@ -509,26 +525,17 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd, return ngx_http_core_listen_orig(cf, cmd, conf); } - /* Force nginx to bind a dedicated socket for this address rather - * than folding it into a wildcard. Without `bind`, nginx's listen - * optimizer discards specific-address entries covered by a - * wildcard, which prevents us from applying SO_BINDTODEVICE. */ - ngx_str_t *bind_arg = ngx_array_push(cf->args); - if (bind_arg == NULL) { - return NGX_CONF_ERROR; - } - ngx_str_set(bind_arg, "bind"); - - /* Parse the listen address ourselves: we need the sockaddr to - * detect duplicates (multiple `listen 80 device=X` at the same - * addr) and to match bindings to cycle->listening[] entries in - * init_module. */ + /* Parse the listen address ourselves so we can dedup on sockaddr. + * We need this before adding `bind` so we can decide whether to + * call the core handler at all. */ ngx_memzero(&u, sizeof(ngx_url_t)); u.url = value[1]; u.listen = 1; u.default_port = 80; if (ngx_parse_url(cf->pool, &u) != NGX_OK || u.naddrs == 0) { + /* Restore the stripped args by just passing through; nginx + * will produce its own parse error. */ return ngx_http_core_listen_orig(cf, cmd, conf); } @@ -541,36 +548,111 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd, return NGX_CONF_ERROR; } } + if (imcf->listens_seen == NULL) { + imcf->listens_seen = ngx_array_create(cf->pool, 16, + sizeof(ngx_http_ipng_stats_seen_t)); + if (imcf->listens_seen == NULL) { + return NGX_CONF_ERROR; + } + } - /* Call the core handler at most once per (addr, port). Any - * subsequent listen at the same sockaddr would hit nginx's - * duplicate-listen check — we skip it and let init_module clone - * the first-seen listening entry for each duplicate. */ - ngx_http_ipng_stats_binding_t *existing = imcf->bindings->elts; - ngx_uint_t dup = 0; - for (i = 0; i < imcf->bindings->nelts; i++) { - if (existing[i].socklen == u.addrs[0].socklen - && ngx_cmp_sockaddr((struct sockaddr *) &existing[i].sockaddr, - existing[i].socklen, - u.addrs[0].sockaddr, - u.addrs[0].socklen, 1) == NGX_OK) + /* Decide whether to call the core handler. nginx rejects a + * repeat of (sockaddr, cscf), but accepts the same sockaddr in a + * different server block and accepts different sockaddrs in the + * same server block. Track the (cscf, sockaddr) pairs we've + * already handed off. */ + void *cscf = ngx_http_conf_get_module_srv_conf(cf, ngx_http_core_module); + ngx_http_ipng_stats_seen_t *seen = imcf->listens_seen->elts; + ngx_uint_t same_cscf_sockaddr = 0; + for (i = 0; i < imcf->listens_seen->nelts; i++) { + if (seen[i].cscf != cscf) continue; + if (seen[i].socklen != u.addrs[0].socklen) continue; + if (ngx_cmp_sockaddr((struct sockaddr *) &seen[i].sockaddr, + seen[i].socklen, + u.addrs[0].sockaddr, + u.addrs[0].socklen, 1) == NGX_OK) { - dup = 1; + same_cscf_sockaddr = 1; break; } } - if (!dup) { + if (!same_cscf_sockaddr) { + /* First time this (cscf, sockaddr) pair is seen. If this + * sockaddr has never been bound at all, inject `bind` so + * nginx allocates a dedicated listening socket for it (and + * sets opt.set=1 / opt.bind=1). For subsequent cscfs that + * reference the same sockaddr, the address entry already + * exists with opt.set=1; passing `bind` again would trip + * nginx's "duplicate listen options" check, so we skip it + * and let nginx's cross-server-block merge attach this cscf + * via ngx_http_add_server. */ + ngx_http_ipng_stats_binding_t *bs = imcf->bindings->elts; + ngx_uint_t sockaddr_ever_seen = 0; + for (i = 0; i < imcf->bindings->nelts; i++) { + if (bs[i].socklen == u.addrs[0].socklen + && ngx_cmp_sockaddr((struct sockaddr *) &bs[i].sockaddr, + bs[i].socklen, + u.addrs[0].sockaddr, + u.addrs[0].socklen, 1) == NGX_OK) + { + sockaddr_ever_seen = 1; + break; + } + } + + if (!sockaddr_ever_seen) { + ngx_str_t *bind_arg = ngx_array_push(cf->args); + if (bind_arg == NULL) { + return NGX_CONF_ERROR; + } + ngx_str_set(bind_arg, "bind"); + } + rv = ngx_http_core_listen_orig(cf, cmd, conf); if (rv != NGX_CONF_OK) { return rv; } + + ngx_http_ipng_stats_seen_t *s = ngx_array_push(imcf->listens_seen); + if (s == NULL) return NGX_CONF_ERROR; + s->cscf = cscf; + s->socklen = u.addrs[0].socklen; + ngx_memcpy(&s->sockaddr, u.addrs[0].sockaddr, u.addrs[0].socklen); } - /* Record one binding per resolved address (ngx_parse_url may yield - * multiple for a hostname; listen specs use literal addresses so - * naddrs is almost always 1). */ + /* Dedup bindings on (sockaddr, device) globally: two server + * blocks that share the same `include` only produce one binding + * per (sockaddr, device) pair. init_module creates one socket per + * unique binding regardless of how many server blocks referenced + * it. */ for (i = 0; i < u.naddrs; i++) { + ngx_http_ipng_stats_binding_t *existing = imcf->bindings->elts; + ngx_uint_t j; + ngx_uint_t dup_binding = 0; + ngx_uint_t sockaddr_reused = 0; + + for (j = 0; j < imcf->bindings->nelts; j++) { + if (existing[j].socklen == u.addrs[i].socklen + && ngx_cmp_sockaddr((struct sockaddr *) &existing[j].sockaddr, + existing[j].socklen, + u.addrs[i].sockaddr, + u.addrs[i].socklen, 1) == NGX_OK) + { + sockaddr_reused = 1; + if (existing[j].device.len == device.len + && (device.len == 0 + || ngx_memcmp(existing[j].device.data, device.data, + device.len) == 0)) + { + dup_binding = 1; + break; + } + } + } + + if (dup_binding) continue; + b = ngx_array_push(imcf->bindings); if (b == NULL) { return NGX_CONF_ERROR; @@ -599,7 +681,7 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd, b->socklen = u.addrs[i].socklen; ngx_memcpy(&b->sockaddr, u.addrs[i].sockaddr, u.addrs[i].socklen); - b->needs_clone = dup ? 1 : 0; + b->needs_clone = sockaddr_reused ? 1 : 0; } return NGX_CONF_OK; diff --git a/tests/01-module/01-e2e.robot b/tests/01-module/01-e2e.robot index 3036f4b..6a0b8f4 100644 --- a/tests/01-module/01-e2e.robot +++ b/tests/01-module/01-e2e.robot @@ -29,6 +29,23 @@ Module loads ${output} = Docker Exec ${SERVER} nginx -t 2>&1 Should Contain ${output} syntax is ok +Shared-listen-include across multiple server blocks + [Documentation] Three server blocks all pull in the same + ... ipng-listens.inc (see docs/user-guide.md). nginx + ... must start without "conflicting server name" or + ... "duplicate listen options" warnings, and the + ... module must end up with exactly one listening + ... socket per (device, family) pair — not one per + ... (server block × device × family), which would + ... exhaust the fd table on a real host. + ${output} = Docker Exec ${SERVER} nginx -t 2>&1 + Should Not Contain ${output} conflicting server name + Should Not Contain ${output} duplicate listen + ${listens} = Docker Exec ${SERVER} ss -tlnH + ${count} = Get Regexp Matches ${listens} :8080\\s + Length Should Be ${count} 4 + ... Expected 4 listening sockets on port 8080 (v4+v6 × eth1+eth2); got ${count} + Prometheus scrape [Documentation] Scrape returns HELP/TYPE preamble. ${output} = Scrape Prometheus diff --git a/tests/01-module/lab/ipng-stats.clab.yml b/tests/01-module/lab/ipng-stats.clab.yml index bbcceda..b4a00df 100644 --- a/tests/01-module/lab/ipng-stats.clab.yml +++ b/tests/01-module/lab/ipng-stats.clab.yml @@ -26,6 +26,7 @@ topology: binds: - ../../../build:/opt/build:ro - ./server/nginx.conf:/opt/config/nginx.conf:ro + - ./server/ipng-listens.inc:/opt/config/ipng-listens.inc:ro - ./server/slow-backend.py:/opt/config/slow-backend.py:ro - ./server/start.sh:/start.sh:ro cmd: bash /start.sh diff --git a/tests/01-module/lab/server/ipng-listens.inc b/tests/01-module/lab/server/ipng-listens.inc new file mode 100644 index 0000000..ce657df --- /dev/null +++ b/tests/01-module/lab/server/ipng-listens.inc @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: Apache-2.0 +# Shared listen include — matches the deployment pattern from +# docs/user-guide.md, where every server block pulls in the same set of +# device-tagged wildcard listens. The 01-module suite includes this +# file from multiple server blocks to exercise the wrapper's dedup +# logic: a naive implementation would either error with "duplicate +# listen options" or create N * (devices × families) sockets. + +listen 8080 device=eth1 ipng_source_tag=tag1; +listen [::]:8080 device=eth1 ipng_source_tag=tag1; +listen 8080 device=eth2 ipng_source_tag=tag2-v4; +listen [::]:8080 device=eth2 ipng_source_tag=tag2-v6; diff --git a/tests/01-module/lab/server/nginx.conf b/tests/01-module/lab/server/nginx.conf index c67a97a..a41b82c 100644 --- a/tests/01-module/lab/server/nginx.conf +++ b/tests/01-module/lab/server/nginx.conf @@ -41,30 +41,45 @@ http { '$ipng_source_tag\t$server_addr\t$scheme'; ipng_stats_logtail ipng_stats_logtail udp://127.0.0.1:9514 buffer=4k flush=500ms if=$logtail_enabled; + # Three server blocks that all pull in the same listen include — + # mirrors the real-world pattern where every site-*.conf has the + # same `include listens.conf;`. The wrapper must: + # * invoke nginx's listen handler exactly once per (server, addr) + # pair, so each server block gets its own cscf attached but no + # server block triggers nginx's "duplicate listen options" + # check; + # * dedup bindings globally on (sockaddr, device), so init_module + # creates exactly four sockets here (two families × two + # devices) rather than 3 × 4 = 12. + # The default server owns the locations used by the traffic tests; + # the two extras exist only to exercise the shared-include pattern. server { - # Per-device wildcard listens. All four share port 8080; the - # kernel's SO_BINDTODEVICE filtering routes each incoming packet - # to the socket pinned to the interface it arrived on. - listen 8080 device=eth1 ipng_source_tag=tag1; - listen [::]:8080 device=eth1 ipng_source_tag=tag1; - listen 8080 device=eth2 ipng_source_tag=tag2-v4; - listen [::]:8080 device=eth2 ipng_source_tag=tag2-v6; - + include /opt/config/ipng-listens.inc; server_name _; location / { return 200 "ok $server_addr\n"; } - location /notfound { return 404 "nope\n"; } - location /slow { proxy_pass http://127.0.0.1:29080/; } } + server { + include /opt/config/ipng-listens.inc; + server_name extra-a.test; + location / { return 200 "a\n"; } + } + + server { + include /opt/config/ipng-listens.inc; + server_name extra-b.test; + location / { return 200 "b\n"; } + } + server { # Direct (mgmt) traffic: no device binding on the listen, # `ipng_stats_default_source direct;` therefore tags it "direct".