Fix shared-listen-include pattern across multiple server blocks
v0.3.0's listen wrapper treated every listen beyond the first at a
given sockaddr as a skip-core "duplicate", which was correct for
two `listen 80 device=X/Y;` lines in one server block but broke on
the real deployment pattern where every server-*.conf pulls in the
same `include listens.conf;`. Symptoms:
* every server block after the first ended up with no listen
directive processed, so nginx assigned them the default
`*:80`, producing a flood of "conflicting server name"
warnings and attaching every server block to an unrelated
wildcard bind;
* the bindings list grew linearly with the number of server
blocks, so init_module tried to create (server_blocks) ×
(devices × families) listening sockets and hit EMFILE.
Replace the single dedup with two independent checks:
* listens_seen is a (cscf, sockaddr) ledger. The core listen
handler is invoked at most once per (server block, sockaddr),
matching nginx's own duplicate check so server-block N just
attaches its cscf to the existing address via
ngx_http_add_server.
* `bind` is added only for the first global occurrence of each
sockaddr; subsequent cscfs inherit opt.set/opt.bind from the
first, which is what keeps nginx's "duplicate listen options"
check happy across server blocks.
* bindings dedup on (sockaddr, device) globally, so init_module
creates one socket per unique pair regardless of how many
server blocks reference it.
Add a regression test at tests/01-module/ that wires three server
blocks to the same ipng-listens.inc and asserts that nginx -t is
clean and exactly four sockets are bound on port 8080.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -200,6 +200,21 @@ typedef struct {
|
||||
} ngx_http_ipng_stats_binding_t;
|
||||
|
||||
|
||||
/* Per-(cscf, sockaddr) tracking used only during config parse. The
|
||||
* listen wrapper uses it to avoid invoking the core `listen` handler
|
||||
* twice for the same (server block, sockaddr) pair — a valid config
|
||||
* with N device-tagged listens on the same port in the same server
|
||||
* block generates exactly one core-handler call and one cscf
|
||||
* attachment, with the remaining listens handled by init_module
|
||||
* cloning. Across server blocks the same sockaddr legitimately recurs,
|
||||
* and each distinct cscf gets its own core-handler call. */
|
||||
typedef struct {
|
||||
void *cscf; /* ngx_http_core_srv_conf_t * */
|
||||
ngx_sockaddr_t sockaddr;
|
||||
socklen_t socklen;
|
||||
} ngx_http_ipng_stats_seen_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
ngx_shm_zone_t *shm_zone;
|
||||
ngx_str_t zone_name;
|
||||
@@ -211,6 +226,7 @@ typedef struct {
|
||||
ngx_uint_t nbytebuckets;
|
||||
ngx_uint_t *byte_bucket_bounds; /* len = nbytebuckets, bytes */
|
||||
ngx_array_t *bindings; /* ngx_http_ipng_stats_binding_t */
|
||||
ngx_array_t *listens_seen; /* ngx_http_ipng_stats_seen_t */
|
||||
ngx_flag_t enabled;
|
||||
|
||||
/* Global logtail (FR-8) — UDP-only. */
|
||||
@@ -509,26 +525,17 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd,
|
||||
return ngx_http_core_listen_orig(cf, cmd, conf);
|
||||
}
|
||||
|
||||
/* Force nginx to bind a dedicated socket for this address rather
|
||||
* than folding it into a wildcard. Without `bind`, nginx's listen
|
||||
* optimizer discards specific-address entries covered by a
|
||||
* wildcard, which prevents us from applying SO_BINDTODEVICE. */
|
||||
ngx_str_t *bind_arg = ngx_array_push(cf->args);
|
||||
if (bind_arg == NULL) {
|
||||
return NGX_CONF_ERROR;
|
||||
}
|
||||
ngx_str_set(bind_arg, "bind");
|
||||
|
||||
/* Parse the listen address ourselves: we need the sockaddr to
|
||||
* detect duplicates (multiple `listen 80 device=X` at the same
|
||||
* addr) and to match bindings to cycle->listening[] entries in
|
||||
* init_module. */
|
||||
/* Parse the listen address ourselves so we can dedup on sockaddr.
|
||||
* We need this before adding `bind` so we can decide whether to
|
||||
* call the core handler at all. */
|
||||
ngx_memzero(&u, sizeof(ngx_url_t));
|
||||
u.url = value[1];
|
||||
u.listen = 1;
|
||||
u.default_port = 80;
|
||||
|
||||
if (ngx_parse_url(cf->pool, &u) != NGX_OK || u.naddrs == 0) {
|
||||
/* Restore the stripped args by just passing through; nginx
|
||||
* will produce its own parse error. */
|
||||
return ngx_http_core_listen_orig(cf, cmd, conf);
|
||||
}
|
||||
|
||||
@@ -541,36 +548,111 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd,
|
||||
return NGX_CONF_ERROR;
|
||||
}
|
||||
}
|
||||
if (imcf->listens_seen == NULL) {
|
||||
imcf->listens_seen = ngx_array_create(cf->pool, 16,
|
||||
sizeof(ngx_http_ipng_stats_seen_t));
|
||||
if (imcf->listens_seen == NULL) {
|
||||
return NGX_CONF_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/* Call the core handler at most once per (addr, port). Any
|
||||
* subsequent listen at the same sockaddr would hit nginx's
|
||||
* duplicate-listen check — we skip it and let init_module clone
|
||||
* the first-seen listening entry for each duplicate. */
|
||||
ngx_http_ipng_stats_binding_t *existing = imcf->bindings->elts;
|
||||
ngx_uint_t dup = 0;
|
||||
for (i = 0; i < imcf->bindings->nelts; i++) {
|
||||
if (existing[i].socklen == u.addrs[0].socklen
|
||||
&& ngx_cmp_sockaddr((struct sockaddr *) &existing[i].sockaddr,
|
||||
existing[i].socklen,
|
||||
u.addrs[0].sockaddr,
|
||||
u.addrs[0].socklen, 1) == NGX_OK)
|
||||
/* Decide whether to call the core handler. nginx rejects a
|
||||
* repeat of (sockaddr, cscf), but accepts the same sockaddr in a
|
||||
* different server block and accepts different sockaddrs in the
|
||||
* same server block. Track the (cscf, sockaddr) pairs we've
|
||||
* already handed off. */
|
||||
void *cscf = ngx_http_conf_get_module_srv_conf(cf, ngx_http_core_module);
|
||||
ngx_http_ipng_stats_seen_t *seen = imcf->listens_seen->elts;
|
||||
ngx_uint_t same_cscf_sockaddr = 0;
|
||||
for (i = 0; i < imcf->listens_seen->nelts; i++) {
|
||||
if (seen[i].cscf != cscf) continue;
|
||||
if (seen[i].socklen != u.addrs[0].socklen) continue;
|
||||
if (ngx_cmp_sockaddr((struct sockaddr *) &seen[i].sockaddr,
|
||||
seen[i].socklen,
|
||||
u.addrs[0].sockaddr,
|
||||
u.addrs[0].socklen, 1) == NGX_OK)
|
||||
{
|
||||
dup = 1;
|
||||
same_cscf_sockaddr = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!dup) {
|
||||
if (!same_cscf_sockaddr) {
|
||||
/* First time this (cscf, sockaddr) pair is seen. If this
|
||||
* sockaddr has never been bound at all, inject `bind` so
|
||||
* nginx allocates a dedicated listening socket for it (and
|
||||
* sets opt.set=1 / opt.bind=1). For subsequent cscfs that
|
||||
* reference the same sockaddr, the address entry already
|
||||
* exists with opt.set=1; passing `bind` again would trip
|
||||
* nginx's "duplicate listen options" check, so we skip it
|
||||
* and let nginx's cross-server-block merge attach this cscf
|
||||
* via ngx_http_add_server. */
|
||||
ngx_http_ipng_stats_binding_t *bs = imcf->bindings->elts;
|
||||
ngx_uint_t sockaddr_ever_seen = 0;
|
||||
for (i = 0; i < imcf->bindings->nelts; i++) {
|
||||
if (bs[i].socklen == u.addrs[0].socklen
|
||||
&& ngx_cmp_sockaddr((struct sockaddr *) &bs[i].sockaddr,
|
||||
bs[i].socklen,
|
||||
u.addrs[0].sockaddr,
|
||||
u.addrs[0].socklen, 1) == NGX_OK)
|
||||
{
|
||||
sockaddr_ever_seen = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!sockaddr_ever_seen) {
|
||||
ngx_str_t *bind_arg = ngx_array_push(cf->args);
|
||||
if (bind_arg == NULL) {
|
||||
return NGX_CONF_ERROR;
|
||||
}
|
||||
ngx_str_set(bind_arg, "bind");
|
||||
}
|
||||
|
||||
rv = ngx_http_core_listen_orig(cf, cmd, conf);
|
||||
if (rv != NGX_CONF_OK) {
|
||||
return rv;
|
||||
}
|
||||
|
||||
ngx_http_ipng_stats_seen_t *s = ngx_array_push(imcf->listens_seen);
|
||||
if (s == NULL) return NGX_CONF_ERROR;
|
||||
s->cscf = cscf;
|
||||
s->socklen = u.addrs[0].socklen;
|
||||
ngx_memcpy(&s->sockaddr, u.addrs[0].sockaddr, u.addrs[0].socklen);
|
||||
}
|
||||
|
||||
/* Record one binding per resolved address (ngx_parse_url may yield
|
||||
* multiple for a hostname; listen specs use literal addresses so
|
||||
* naddrs is almost always 1). */
|
||||
/* Dedup bindings on (sockaddr, device) globally: two server
|
||||
* blocks that share the same `include` only produce one binding
|
||||
* per (sockaddr, device) pair. init_module creates one socket per
|
||||
* unique binding regardless of how many server blocks referenced
|
||||
* it. */
|
||||
for (i = 0; i < u.naddrs; i++) {
|
||||
ngx_http_ipng_stats_binding_t *existing = imcf->bindings->elts;
|
||||
ngx_uint_t j;
|
||||
ngx_uint_t dup_binding = 0;
|
||||
ngx_uint_t sockaddr_reused = 0;
|
||||
|
||||
for (j = 0; j < imcf->bindings->nelts; j++) {
|
||||
if (existing[j].socklen == u.addrs[i].socklen
|
||||
&& ngx_cmp_sockaddr((struct sockaddr *) &existing[j].sockaddr,
|
||||
existing[j].socklen,
|
||||
u.addrs[i].sockaddr,
|
||||
u.addrs[i].socklen, 1) == NGX_OK)
|
||||
{
|
||||
sockaddr_reused = 1;
|
||||
if (existing[j].device.len == device.len
|
||||
&& (device.len == 0
|
||||
|| ngx_memcmp(existing[j].device.data, device.data,
|
||||
device.len) == 0))
|
||||
{
|
||||
dup_binding = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dup_binding) continue;
|
||||
|
||||
b = ngx_array_push(imcf->bindings);
|
||||
if (b == NULL) {
|
||||
return NGX_CONF_ERROR;
|
||||
@@ -599,7 +681,7 @@ ngx_http_ipng_stats_listen_wrapper(ngx_conf_t *cf, ngx_command_t *cmd,
|
||||
|
||||
b->socklen = u.addrs[i].socklen;
|
||||
ngx_memcpy(&b->sockaddr, u.addrs[i].sockaddr, u.addrs[i].socklen);
|
||||
b->needs_clone = dup ? 1 : 0;
|
||||
b->needs_clone = sockaddr_reused ? 1 : 0;
|
||||
}
|
||||
|
||||
return NGX_CONF_OK;
|
||||
|
||||
@@ -29,6 +29,23 @@ Module loads
|
||||
${output} = Docker Exec ${SERVER} nginx -t 2>&1
|
||||
Should Contain ${output} syntax is ok
|
||||
|
||||
Shared-listen-include across multiple server blocks
|
||||
[Documentation] Three server blocks all pull in the same
|
||||
... ipng-listens.inc (see docs/user-guide.md). nginx
|
||||
... must start without "conflicting server name" or
|
||||
... "duplicate listen options" warnings, and the
|
||||
... module must end up with exactly one listening
|
||||
... socket per (device, family) pair — not one per
|
||||
... (server block × device × family), which would
|
||||
... exhaust the fd table on a real host.
|
||||
${output} = Docker Exec ${SERVER} nginx -t 2>&1
|
||||
Should Not Contain ${output} conflicting server name
|
||||
Should Not Contain ${output} duplicate listen
|
||||
${listens} = Docker Exec ${SERVER} ss -tlnH
|
||||
${count} = Get Regexp Matches ${listens} :8080\\s
|
||||
Length Should Be ${count} 4
|
||||
... Expected 4 listening sockets on port 8080 (v4+v6 × eth1+eth2); got ${count}
|
||||
|
||||
Prometheus scrape
|
||||
[Documentation] Scrape returns HELP/TYPE preamble.
|
||||
${output} = Scrape Prometheus
|
||||
|
||||
@@ -26,6 +26,7 @@ topology:
|
||||
binds:
|
||||
- ../../../build:/opt/build:ro
|
||||
- ./server/nginx.conf:/opt/config/nginx.conf:ro
|
||||
- ./server/ipng-listens.inc:/opt/config/ipng-listens.inc:ro
|
||||
- ./server/slow-backend.py:/opt/config/slow-backend.py:ro
|
||||
- ./server/start.sh:/start.sh:ro
|
||||
cmd: bash /start.sh
|
||||
|
||||
12
tests/01-module/lab/server/ipng-listens.inc
Normal file
12
tests/01-module/lab/server/ipng-listens.inc
Normal file
@@ -0,0 +1,12 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Shared listen include — matches the deployment pattern from
|
||||
# docs/user-guide.md, where every server block pulls in the same set of
|
||||
# device-tagged wildcard listens. The 01-module suite includes this
|
||||
# file from multiple server blocks to exercise the wrapper's dedup
|
||||
# logic: a naive implementation would either error with "duplicate
|
||||
# listen options" or create N * (devices × families) sockets.
|
||||
|
||||
listen 8080 device=eth1 ipng_source_tag=tag1;
|
||||
listen [::]:8080 device=eth1 ipng_source_tag=tag1;
|
||||
listen 8080 device=eth2 ipng_source_tag=tag2-v4;
|
||||
listen [::]:8080 device=eth2 ipng_source_tag=tag2-v6;
|
||||
@@ -41,30 +41,45 @@ http {
|
||||
'$ipng_source_tag\t$server_addr\t$scheme';
|
||||
ipng_stats_logtail ipng_stats_logtail udp://127.0.0.1:9514 buffer=4k flush=500ms if=$logtail_enabled;
|
||||
|
||||
# Three server blocks that all pull in the same listen include —
|
||||
# mirrors the real-world pattern where every site-*.conf has the
|
||||
# same `include listens.conf;`. The wrapper must:
|
||||
# * invoke nginx's listen handler exactly once per (server, addr)
|
||||
# pair, so each server block gets its own cscf attached but no
|
||||
# server block triggers nginx's "duplicate listen options"
|
||||
# check;
|
||||
# * dedup bindings globally on (sockaddr, device), so init_module
|
||||
# creates exactly four sockets here (two families × two
|
||||
# devices) rather than 3 × 4 = 12.
|
||||
# The default server owns the locations used by the traffic tests;
|
||||
# the two extras exist only to exercise the shared-include pattern.
|
||||
server {
|
||||
# Per-device wildcard listens. All four share port 8080; the
|
||||
# kernel's SO_BINDTODEVICE filtering routes each incoming packet
|
||||
# to the socket pinned to the interface it arrived on.
|
||||
listen 8080 device=eth1 ipng_source_tag=tag1;
|
||||
listen [::]:8080 device=eth1 ipng_source_tag=tag1;
|
||||
listen 8080 device=eth2 ipng_source_tag=tag2-v4;
|
||||
listen [::]:8080 device=eth2 ipng_source_tag=tag2-v6;
|
||||
|
||||
include /opt/config/ipng-listens.inc;
|
||||
server_name _;
|
||||
|
||||
location / {
|
||||
return 200 "ok $server_addr\n";
|
||||
}
|
||||
|
||||
location /notfound {
|
||||
return 404 "nope\n";
|
||||
}
|
||||
|
||||
location /slow {
|
||||
proxy_pass http://127.0.0.1:29080/;
|
||||
}
|
||||
}
|
||||
|
||||
server {
|
||||
include /opt/config/ipng-listens.inc;
|
||||
server_name extra-a.test;
|
||||
location / { return 200 "a\n"; }
|
||||
}
|
||||
|
||||
server {
|
||||
include /opt/config/ipng-listens.inc;
|
||||
server_name extra-b.test;
|
||||
location / { return 200 "b\n"; }
|
||||
}
|
||||
|
||||
server {
|
||||
# Direct (mgmt) traffic: no device binding on the listen,
|
||||
# `ipng_stats_default_source direct;` therefore tags it "direct".
|
||||
|
||||
Reference in New Issue
Block a user