Fix shared-listen-include pattern across multiple server blocks

v0.3.0's listen wrapper treated every listen beyond the first at a
given sockaddr as a skip-core "duplicate", which was correct for
two `listen 80 device=X/Y;` lines in one server block but broke on
the real deployment pattern where every server-*.conf pulls in the
same `include listens.conf;`. Symptoms:

  * every server block after the first ended up with no listen
    directive processed, so nginx assigned them the default
    `*:80`, producing a flood of "conflicting server name"
    warnings and attaching every server block to an unrelated
    wildcard bind;
  * the bindings list grew linearly with the number of server
    blocks, so init_module tried to create (server_blocks) ×
    (devices × families) listening sockets and hit EMFILE.

Replace the single dedup with two independent checks:

  * listens_seen is a (cscf, sockaddr) ledger. The core listen
    handler is invoked at most once per (server block, sockaddr),
    matching nginx's own duplicate check so server-block N just
    attaches its cscf to the existing address via
    ngx_http_add_server.

  * `bind` is added only for the first global occurrence of each
    sockaddr; subsequent cscfs inherit opt.set/opt.bind from the
    first, which is what keeps nginx's "duplicate listen options"
    check happy across server blocks.

  * bindings dedup on (sockaddr, device) globally, so init_module
    creates one socket per unique pair regardless of how many
    server blocks reference it.

Add a regression test at tests/01-module/ that wires three server
blocks to the same ipng-listens.inc and asserts that nginx -t is
clean and exactly four sockets are bound on port 8080.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-18 13:15:04 +02:00
parent ef821e577b
commit 1f144f4c19
5 changed files with 169 additions and 42 deletions

View File

@@ -200,6 +200,21 @@ typedef struct {
} ngx_http_ipng_stats_binding_t;
/* Per-(cscf, sockaddr) tracking used only during config parse. The
* listen wrapper uses it to avoid invoking the core `listen` handler
* twice for the same (server block, sockaddr) pair — a valid config
* with N device-tagged listens on the same port in the same server
* block generates exactly one core-handler call and one cscf
* attachment, with the remaining listens handled by init_module
* cloning. Across server blocks the same sockaddr legitimately recurs,
* and each distinct cscf gets its own core-handler call. */
typedef struct {
void *cscf; /* ngx_http_core_srv_conf_t * */
ngx_sockaddr_t sockaddr;
socklen_t socklen;
} ngx_http_ipng_stats_seen_t;
typedef struct {
ngx_shm_zone_t *shm_zone;
ngx_str_t zone_name;
@@ -211,6 +226,7 @@ typedef struct {
ngx_uint_t nbytebuckets;
ngx_uint_t *byte_bucket_bounds; /* len = nbytebuckets, bytes */
ngx_array_t *bindings; /* ngx_http_ipng_stats_binding_t */
ngx_array_t *listens_seen; /* ngx_http_ipng_stats_seen_t */
ngx_flag_t enabled;
/* Global logtail (FR-8) — UDP-only. */
@@ -509,26 +525,17 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd,
return ngx_http_core_listen_orig(cf, cmd, conf);
}
/* Force nginx to bind a dedicated socket for this address rather
* than folding it into a wildcard. Without `bind`, nginx's listen
* optimizer discards specific-address entries covered by a
* wildcard, which prevents us from applying SO_BINDTODEVICE. */
ngx_str_t *bind_arg = ngx_array_push(cf->args);
if (bind_arg == NULL) {
return NGX_CONF_ERROR;
}
ngx_str_set(bind_arg, "bind");
/* Parse the listen address ourselves: we need the sockaddr to
* detect duplicates (multiple `listen 80 device=X` at the same
* addr) and to match bindings to cycle->listening[] entries in
* init_module. */
/* Parse the listen address ourselves so we can dedup on sockaddr.
* We need this before adding `bind` so we can decide whether to
* call the core handler at all. */
ngx_memzero(&u, sizeof(ngx_url_t));
u.url = value[1];
u.listen = 1;
u.default_port = 80;
if (ngx_parse_url(cf->pool, &u) != NGX_OK || u.naddrs == 0) {
/* Restore the stripped args by just passing through; nginx
* will produce its own parse error. */
return ngx_http_core_listen_orig(cf, cmd, conf);
}
@@ -541,36 +548,111 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd,
return NGX_CONF_ERROR;
}
}
if (imcf->listens_seen == NULL) {
imcf->listens_seen = ngx_array_create(cf->pool, 16,
sizeof(ngx_http_ipng_stats_seen_t));
if (imcf->listens_seen == NULL) {
return NGX_CONF_ERROR;
}
}
/* Call the core handler at most once per (addr, port). Any
* subsequent listen at the same sockaddr would hit nginx's
* duplicate-listen check — we skip it and let init_module clone
* the first-seen listening entry for each duplicate. */
ngx_http_ipng_stats_binding_t *existing = imcf->bindings->elts;
ngx_uint_t dup = 0;
for (i = 0; i < imcf->bindings->nelts; i++) {
if (existing[i].socklen == u.addrs[0].socklen
&& ngx_cmp_sockaddr((struct sockaddr *) &existing[i].sockaddr,
existing[i].socklen,
/* Decide whether to call the core handler. nginx rejects a
* repeat of (sockaddr, cscf), but accepts the same sockaddr in a
* different server block and accepts different sockaddrs in the
* same server block. Track the (cscf, sockaddr) pairs we've
* already handed off. */
void *cscf = ngx_http_conf_get_module_srv_conf(cf, ngx_http_core_module);
ngx_http_ipng_stats_seen_t *seen = imcf->listens_seen->elts;
ngx_uint_t same_cscf_sockaddr = 0;
for (i = 0; i < imcf->listens_seen->nelts; i++) {
if (seen[i].cscf != cscf) continue;
if (seen[i].socklen != u.addrs[0].socklen) continue;
if (ngx_cmp_sockaddr((struct sockaddr *) &seen[i].sockaddr,
seen[i].socklen,
u.addrs[0].sockaddr,
u.addrs[0].socklen, 1) == NGX_OK)
{
dup = 1;
same_cscf_sockaddr = 1;
break;
}
}
if (!dup) {
if (!same_cscf_sockaddr) {
/* First time this (cscf, sockaddr) pair is seen. If this
* sockaddr has never been bound at all, inject `bind` so
* nginx allocates a dedicated listening socket for it (and
* sets opt.set=1 / opt.bind=1). For subsequent cscfs that
* reference the same sockaddr, the address entry already
* exists with opt.set=1; passing `bind` again would trip
* nginx's "duplicate listen options" check, so we skip it
* and let nginx's cross-server-block merge attach this cscf
* via ngx_http_add_server. */
ngx_http_ipng_stats_binding_t *bs = imcf->bindings->elts;
ngx_uint_t sockaddr_ever_seen = 0;
for (i = 0; i < imcf->bindings->nelts; i++) {
if (bs[i].socklen == u.addrs[0].socklen
&& ngx_cmp_sockaddr((struct sockaddr *) &bs[i].sockaddr,
bs[i].socklen,
u.addrs[0].sockaddr,
u.addrs[0].socklen, 1) == NGX_OK)
{
sockaddr_ever_seen = 1;
break;
}
}
if (!sockaddr_ever_seen) {
ngx_str_t *bind_arg = ngx_array_push(cf->args);
if (bind_arg == NULL) {
return NGX_CONF_ERROR;
}
ngx_str_set(bind_arg, "bind");
}
rv = ngx_http_core_listen_orig(cf, cmd, conf);
if (rv != NGX_CONF_OK) {
return rv;
}
ngx_http_ipng_stats_seen_t *s = ngx_array_push(imcf->listens_seen);
if (s == NULL) return NGX_CONF_ERROR;
s->cscf = cscf;
s->socklen = u.addrs[0].socklen;
ngx_memcpy(&s->sockaddr, u.addrs[0].sockaddr, u.addrs[0].socklen);
}
/* Record one binding per resolved address (ngx_parse_url may yield
* multiple for a hostname; listen specs use literal addresses so
* naddrs is almost always 1). */
/* Dedup bindings on (sockaddr, device) globally: two server
* blocks that share the same `include` only produce one binding
* per (sockaddr, device) pair. init_module creates one socket per
* unique binding regardless of how many server blocks referenced
* it. */
for (i = 0; i < u.naddrs; i++) {
ngx_http_ipng_stats_binding_t *existing = imcf->bindings->elts;
ngx_uint_t j;
ngx_uint_t dup_binding = 0;
ngx_uint_t sockaddr_reused = 0;
for (j = 0; j < imcf->bindings->nelts; j++) {
if (existing[j].socklen == u.addrs[i].socklen
&& ngx_cmp_sockaddr((struct sockaddr *) &existing[j].sockaddr,
existing[j].socklen,
u.addrs[i].sockaddr,
u.addrs[i].socklen, 1) == NGX_OK)
{
sockaddr_reused = 1;
if (existing[j].device.len == device.len
&& (device.len == 0
|| ngx_memcmp(existing[j].device.data, device.data,
device.len) == 0))
{
dup_binding = 1;
break;
}
}
}
if (dup_binding) continue;
b = ngx_array_push(imcf->bindings);
if (b == NULL) {
return NGX_CONF_ERROR;
@@ -599,7 +681,7 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd,
b->socklen = u.addrs[i].socklen;
ngx_memcpy(&b->sockaddr, u.addrs[i].sockaddr, u.addrs[i].socklen);
b->needs_clone = dup ? 1 : 0;
b->needs_clone = sockaddr_reused ? 1 : 0;
}
return NGX_CONF_OK;

View File

@@ -29,6 +29,23 @@ Module loads
${output} = Docker Exec ${SERVER} nginx -t 2>&1
Should Contain ${output} syntax is ok
Shared-listen-include across multiple server blocks
[Documentation] Three server blocks all pull in the same
... ipng-listens.inc (see docs/user-guide.md). nginx
... must start without "conflicting server name" or
... "duplicate listen options" warnings, and the
... module must end up with exactly one listening
... socket per (device, family) pair — not one per
... (server block × device × family), which would
... exhaust the fd table on a real host.
${output} = Docker Exec ${SERVER} nginx -t 2>&1
Should Not Contain ${output} conflicting server name
Should Not Contain ${output} duplicate listen
${listens} = Docker Exec ${SERVER} ss -tlnH
${count} = Get Regexp Matches ${listens} :8080\\s
Length Should Be ${count} 4
... Expected 4 listening sockets on port 8080 (v4+v6 × eth1+eth2); got ${count}
Prometheus scrape
[Documentation] Scrape returns HELP/TYPE preamble.
${output} = Scrape Prometheus

View File

@@ -26,6 +26,7 @@ topology:
binds:
- ../../../build:/opt/build:ro
- ./server/nginx.conf:/opt/config/nginx.conf:ro
- ./server/ipng-listens.inc:/opt/config/ipng-listens.inc:ro
- ./server/slow-backend.py:/opt/config/slow-backend.py:ro
- ./server/start.sh:/start.sh:ro
cmd: bash /start.sh

View File

@@ -0,0 +1,12 @@
# SPDX-License-Identifier: Apache-2.0
# Shared listen include — matches the deployment pattern from
# docs/user-guide.md, where every server block pulls in the same set of
# device-tagged wildcard listens. The 01-module suite includes this
# file from multiple server blocks to exercise the wrapper's dedup
# logic: a naive implementation would either error with "duplicate
# listen options" or create N * (devices × families) sockets.
listen 8080 device=eth1 ipng_source_tag=tag1;
listen [::]:8080 device=eth1 ipng_source_tag=tag1;
listen 8080 device=eth2 ipng_source_tag=tag2-v4;
listen [::]:8080 device=eth2 ipng_source_tag=tag2-v6;

View File

@@ -41,30 +41,45 @@ http {
'$ipng_source_tag\t$server_addr\t$scheme';
ipng_stats_logtail ipng_stats_logtail udp://127.0.0.1:9514 buffer=4k flush=500ms if=$logtail_enabled;
# Three server blocks that all pull in the same listen include —
# mirrors the real-world pattern where every site-*.conf has the
# same `include listens.conf;`. The wrapper must:
# * invoke nginx's listen handler exactly once per (server, addr)
# pair, so each server block gets its own cscf attached but no
# server block triggers nginx's "duplicate listen options"
# check;
# * dedup bindings globally on (sockaddr, device), so init_module
# creates exactly four sockets here (two families × two
# devices) rather than 3 × 4 = 12.
# The default server owns the locations used by the traffic tests;
# the two extras exist only to exercise the shared-include pattern.
server {
# Per-device wildcard listens. All four share port 8080; the
# kernel's SO_BINDTODEVICE filtering routes each incoming packet
# to the socket pinned to the interface it arrived on.
listen 8080 device=eth1 ipng_source_tag=tag1;
listen [::]:8080 device=eth1 ipng_source_tag=tag1;
listen 8080 device=eth2 ipng_source_tag=tag2-v4;
listen [::]:8080 device=eth2 ipng_source_tag=tag2-v6;
include /opt/config/ipng-listens.inc;
server_name _;
location / {
return 200 "ok $server_addr\n";
}
location /notfound {
return 404 "nope\n";
}
location /slow {
proxy_pass http://127.0.0.1:29080/;
}
}
server {
include /opt/config/ipng-listens.inc;
server_name extra-a.test;
location / { return 200 "a\n"; }
}
server {
include /opt/config/ipng-listens.inc;
server_name extra-b.test;
location / { return 200 "b\n"; }
}
server {
# Direct (mgmt) traffic: no device binding on the listen,
# `ipng_stats_default_source direct;` therefore tags it "direct".