Harden scrape rendering and add AddressSanitizer test suite
Move all heap allocation out of the slab-mutex critical section in render_prom/render_json: snapshot cardinality under a brief lock, allocate aggs/snaps/string tables outside the lock, then re-acquire only to deep-copy strings and walk the LRU into the pre-allocated buffers. A worker crash during output buffer allocation can no longer leave the shared-memory zone locked, and a corrupt cardinality count is caught by a 10k sanity cap rather than causing a runaway ngx_pcalloc. Add build-asan and tests/02-asan/: a full sanitizer-instrumented nginx + module built via apt-source, and a 2-node containerlab Robot suite that drives reload storms, concurrent scrape-during-reload, and intern-table growth, failing if AddressSanitizer or UBSan reports anything on stderr. The two Robot suites now check for their required build artifacts up front so `make robot-test` no longer rebuilds them on every invocation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
84
Makefile
84
Makefile
@@ -27,7 +27,7 @@ VERSION := 0.2.0
|
||||
|
||||
NGINX_SRC ?=
|
||||
|
||||
.PHONY: help build pkg-deb robot-test install-deps clean fetch-nginx-src version-header
|
||||
.PHONY: help build build-asan pkg-deb robot-test install-deps clean fetch-nginx-src version-header
|
||||
|
||||
TEST ?= tests/
|
||||
|
||||
@@ -35,8 +35,10 @@ help:
|
||||
@echo "nginx-ipng-stats-plugin — make targets"
|
||||
@echo ""
|
||||
@echo " make build Build $(MODULE_NAME).so out-of-tree."
|
||||
@echo " make build-asan Build a full nginx + module with AddressSanitizer"
|
||||
@echo " into build/nginx-asan/ for local crash-hunting."
|
||||
@echo " make pkg-deb Build a Debian package via dpkg-buildpackage."
|
||||
@echo " make robot-test Build .deb, then run Robot Framework e2e tests."
|
||||
@echo " make robot-test Run Robot Framework e2e tests (all suites)."
|
||||
@echo " make install-deps Install build and test dependencies (apt)."
|
||||
@echo " make clean Remove build artifacts."
|
||||
@echo ""
|
||||
@@ -114,6 +116,80 @@ fetch-nginx-src:
|
||||
mv "$$NGX_DIR" $(BUILD_DIR)/nginx-src; \
|
||||
rm -rf $(BUILD_DIR)/apt-src
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# build-asan: full nginx + module built with AddressSanitizer. ASan
|
||||
# needs to be present in the main binary to instrument dynamic modules;
|
||||
# running a stock nginx with LD_PRELOAD=libasan.so does NOT work with
|
||||
# a statically-linked module's allocations. So we build nginx itself
|
||||
# here, into an isolated prefix under build/nginx-asan/, which you can
|
||||
# run by hand against any config. Normal `make build` is unaffected.
|
||||
#
|
||||
# Typical workflow for the reload-crash hunt:
|
||||
# make build-asan
|
||||
# cp your-test.conf build/nginx-asan/conf/nginx.conf
|
||||
# ASAN_OPTIONS=detect_odr_violation=0:abort_on_error=1:halt_on_error=1:detect_leaks=0 \
|
||||
# build/nginx-asan/sbin/nginx -p build/nginx-asan
|
||||
# # in another shell:
|
||||
# build/nginx-asan/sbin/nginx -p build/nginx-asan -s reload
|
||||
# ASan writes any findings to stderr of the master before aborting.
|
||||
#
|
||||
# detect_odr_violation=0 suppresses false positives for symbols nginx
|
||||
# intentionally duplicates between its main binary and dynamic modules
|
||||
# (e.g. ngx_module_names in objs/ngx_http_ipng_stats_module_modules.c).
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# -fno-sanitize=nonnull-attribute suppresses a UBSan finding in nginx's
|
||||
# ngx_cpymem (src/core/ngx_string.c:84): the macro expands to memcpy(...)
|
||||
# which glibc declares _Nonnull, but nginx legitimately calls it with
|
||||
# (dst, NULL, 0) in many places. Filtering just that check keeps real
|
||||
# undefined behaviour visible.
|
||||
ASAN_CFLAGS := -fsanitize=address -fsanitize=undefined -fno-sanitize=nonnull-attribute -fno-omit-frame-pointer -fno-common -g -O1
|
||||
ASAN_LDFLAGS := -fsanitize=address -fsanitize=undefined
|
||||
|
||||
# The ASan build rebuilds nginx from source rather than only the module,
|
||||
# because ASan must instrument the main binary to catch heap issues in
|
||||
# modules it loads. The `nginx-dev` shim source tree under /usr/share
|
||||
# only carries headers, so we always fall through to `apt source nginx`
|
||||
# here and keep the full source tree isolated in build/nginx-asan-src/.
|
||||
build-asan: version-header
|
||||
@set -e; \
|
||||
NGX_SRC="$(BUILD_DIR)/nginx-asan-src"; \
|
||||
if [ ! -d "$$NGX_SRC" ] || [ ! -f "$$NGX_SRC/src/core/nginx.c" ]; then \
|
||||
echo "Fetching full nginx source via apt-source for ASan build"; \
|
||||
mkdir -p $(BUILD_DIR)/apt-src-asan; \
|
||||
rm -rf $$NGX_SRC; \
|
||||
cd $(BUILD_DIR)/apt-src-asan && apt source nginx; \
|
||||
NGX_DIR=$$(find $(BUILD_DIR)/apt-src-asan -maxdepth 1 -type d -name 'nginx-*' | head -n1); \
|
||||
if [ -z "$$NGX_DIR" ]; then \
|
||||
echo "error: could not find unpacked nginx source tree" >&2; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
mv "$$NGX_DIR" $$NGX_SRC; \
|
||||
rm -rf $(BUILD_DIR)/apt-src-asan; \
|
||||
fi; \
|
||||
PREFIX="$(BUILD_DIR)/nginx-asan"; \
|
||||
echo "Configuring ASan nginx in $$NGX_SRC (prefix=$$PREFIX)"; \
|
||||
cd "$$NGX_SRC" && ./configure \
|
||||
--prefix="$$PREFIX" \
|
||||
--with-cc-opt="$(ASAN_CFLAGS)" \
|
||||
--with-ld-opt="$(ASAN_LDFLAGS)" \
|
||||
--with-compat \
|
||||
--with-debug \
|
||||
--add-dynamic-module=$(MODULE_DIR); \
|
||||
echo "Building ASan nginx + module (this takes a minute)"; \
|
||||
$(MAKE) -C "$$NGX_SRC" -f objs/Makefile build; \
|
||||
$(MAKE) -C "$$NGX_SRC" -f objs/Makefile install; \
|
||||
install -d $$PREFIX/modules; \
|
||||
install -m 0644 "$$NGX_SRC/objs/$(MODULE_NAME).so" $$PREFIX/modules/; \
|
||||
echo ""; \
|
||||
echo "ASan build ready:"; \
|
||||
echo " nginx: $$PREFIX/sbin/nginx"; \
|
||||
echo " module: $$PREFIX/modules/$(MODULE_NAME).so"; \
|
||||
echo ""; \
|
||||
echo "Run with:"; \
|
||||
echo " ASAN_OPTIONS=detect_odr_violation=0:abort_on_error=1:halt_on_error=1:detect_leaks=0 \\"; \
|
||||
echo " $$PREFIX/sbin/nginx -p $$PREFIX"
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# pkg-deb: build a .deb
|
||||
# ----------------------------------------------------------------------
|
||||
@@ -146,10 +222,6 @@ tests/.venv: tests/requirements.txt
|
||||
tests/.venv/bin/pip install -q -r tests/requirements.txt
|
||||
|
||||
robot-test: tests/.venv
|
||||
@if [ ! -f $(BUILD_DIR)/libnginx-mod-http-ipng-stats_*.deb ]; then \
|
||||
echo "error: no .deb found in $(BUILD_DIR)/. Run 'make pkg-deb' first." >&2; \
|
||||
exit 1; \
|
||||
fi
|
||||
tests/rf-run.sh docker $(TEST)
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
@@ -2212,6 +2212,33 @@ ngx_http_ipng_stats_append(ngx_chain_t ***last, ngx_chain_t *cl)
|
||||
}
|
||||
|
||||
|
||||
/* Per-node snapshot used by the Prometheus and JSON renderers. The
|
||||
* renderers drain the shared-zone LRU into an array of these under
|
||||
* the slab mutex, and then release the mutex before doing any output
|
||||
* buffer allocation. That keeps ngx_pcalloc / malloc off the locked
|
||||
* path — a worker crash during rendering can never leave the slab
|
||||
* mutex held (NFR-4.3), and slab-lock contention no longer blocks on
|
||||
* glibc's allocator. */
|
||||
typedef struct {
|
||||
ngx_uint_t source_id;
|
||||
ngx_uint_t vip_id;
|
||||
ngx_uint_t class;
|
||||
uint64_t requests;
|
||||
uint64_t bytes_in;
|
||||
uint64_t bytes_out;
|
||||
uint64_t duration_sum_ms;
|
||||
uint64_t upstream_sum_ms;
|
||||
} ngx_http_ipng_stats_snap_t;
|
||||
|
||||
|
||||
/* Sanity cap on shared-zone cardinality, above which the renderer
|
||||
* refuses to snapshot rather than try to allocate a potentially
|
||||
* runaway buffer. This is a belt-and-suspenders guard against a
|
||||
* corrupt `sh->sources.nelts` or `sh->vips.nelts` — see design.md
|
||||
* on reload hardening. */
|
||||
#define NGX_HTTP_IPNG_STATS_MAX_INTERN 10000
|
||||
|
||||
|
||||
/* Find or create an aggregation entry for (source_id, vip_id). Linear
|
||||
* scan — the table is small in practice (< 100 entries). */
|
||||
static ngx_http_ipng_stats_agg_t *
|
||||
@@ -2232,91 +2259,45 @@ ngx_http_ipng_stats_agg_get(ngx_http_ipng_stats_agg_t *aggs, ngx_uint_t *naggs,
|
||||
}
|
||||
|
||||
|
||||
/* Walk matching nodes, emit per-node counters via `emit_counters`,
|
||||
* accumulate histograms into `aggs`. Caller holds slab mutex.
|
||||
* `ctx` is opaque — passed through to the emit callback. Returns number
|
||||
* of distinct (source, vip) pairs observed, or -1 on error. */
|
||||
typedef ngx_int_t (*ngx_http_ipng_stats_counters_pt)(ngx_http_request_t *r,
|
||||
ngx_http_ipng_stats_shctx_t *sh, ngx_http_ipng_stats_node_t *n,
|
||||
ngx_chain_t ***last, ngx_uint_t *emitted);
|
||||
|
||||
/* Deep-copy the sources and vips interning tables into r->pool so the
|
||||
* renderers can dereference strings without holding the slab mutex.
|
||||
* Caller holds the slab mutex. Writes the number of valid entries
|
||||
* (possibly smaller than the allocated length) via *n_src_out / *n_vip_out. */
|
||||
static ngx_int_t
|
||||
ngx_http_ipng_stats_walk_aggregate(ngx_http_request_t *r,
|
||||
ngx_http_ipng_stats_main_conf_t *imcf,
|
||||
ngx_str_t *filter_src, ngx_str_t *filter_vip,
|
||||
ngx_http_ipng_stats_counters_pt emit_counters,
|
||||
ngx_chain_t ***last, ngx_uint_t *emitted,
|
||||
ngx_http_ipng_stats_agg_t *aggs, ngx_uint_t naggs_alloc,
|
||||
ngx_uint_t *naggs_out)
|
||||
ngx_http_ipng_stats_snapshot_strings(ngx_http_request_t *r,
|
||||
ngx_http_ipng_stats_shctx_t *sh,
|
||||
ngx_str_t *src_tbl, ngx_uint_t n_src_alloc, ngx_uint_t *n_src_out,
|
||||
ngx_str_t *vip_tbl, ngx_uint_t n_vip_alloc, ngx_uint_t *n_vip_out)
|
||||
{
|
||||
ngx_http_ipng_stats_shctx_t *sh = imcf->shm_zone->data;
|
||||
ngx_queue_t *q;
|
||||
ngx_http_ipng_stats_node_t *n;
|
||||
ngx_str_t *src_entry, *vip_entry;
|
||||
ngx_atomic_uint_t *lanes, *blanes;
|
||||
ngx_http_ipng_stats_agg_t *a;
|
||||
ngx_uint_t i;
|
||||
ngx_uint_t nb = imcf->nbuckets;
|
||||
ngx_uint_t nbb = imcf->nbytebuckets;
|
||||
ngx_uint_t naggs = 0;
|
||||
ngx_int_t rc;
|
||||
ngx_uint_t i, n;
|
||||
u_char *d;
|
||||
|
||||
for (q = ngx_queue_head(&sh->lru);
|
||||
q != ngx_queue_sentinel(&sh->lru);
|
||||
q = ngx_queue_next(q))
|
||||
{
|
||||
n = ngx_queue_data(q, ngx_http_ipng_stats_node_t, lru);
|
||||
if (n->source_id >= sh->sources.nelts
|
||||
|| n->vip_id >= sh->vips.nelts)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
src_entry = &sh->sources.entries[n->source_id];
|
||||
vip_entry = &sh->vips.entries[n->vip_id];
|
||||
|
||||
if (filter_src->len > 0
|
||||
&& (src_entry->len != filter_src->len
|
||||
|| ngx_memcmp(src_entry->data, filter_src->data,
|
||||
filter_src->len) != 0))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (filter_vip->len > 0
|
||||
&& (vip_entry->len != filter_vip->len
|
||||
|| ngx_memcmp(vip_entry->data, filter_vip->data,
|
||||
filter_vip->len) != 0))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
rc = emit_counters(r, sh, n, last, emitted);
|
||||
if (rc != NGX_OK) return NGX_ERROR;
|
||||
|
||||
a = ngx_http_ipng_stats_agg_get(aggs, &naggs, naggs_alloc,
|
||||
n->source_id, n->vip_id);
|
||||
if (a == NULL) continue;
|
||||
|
||||
a->duration_sum_ms += n->duration_sum_ms;
|
||||
a->upstream_sum_ms += n->upstream_sum_ms;
|
||||
a->bytes_in_sum += n->bytes_in;
|
||||
a->bytes_out_sum += n->bytes_out;
|
||||
a->req_total += n->requests;
|
||||
|
||||
lanes = (ngx_atomic_uint_t *) (n + 1);
|
||||
blanes = lanes + 2 * (nb + 1);
|
||||
|
||||
for (i = 0; i <= nb; i++) {
|
||||
a->dhist[i] += lanes[i];
|
||||
a->uhist[i] += lanes[nb + 1 + i];
|
||||
a->up_total += lanes[nb + 1 + i];
|
||||
}
|
||||
for (i = 0; i <= nbb; i++) {
|
||||
a->bin_hist[i] += blanes[i];
|
||||
a->bout_hist[i] += blanes[nbb + 1 + i];
|
||||
n = sh->sources.nelts < n_src_alloc ? sh->sources.nelts : n_src_alloc;
|
||||
for (i = 0; i < n; i++) {
|
||||
if (sh->sources.entries[i].len > 0) {
|
||||
d = ngx_pnalloc(r->pool, sh->sources.entries[i].len);
|
||||
if (d == NULL) return NGX_ERROR;
|
||||
ngx_memcpy(d, sh->sources.entries[i].data,
|
||||
sh->sources.entries[i].len);
|
||||
src_tbl[i].data = d;
|
||||
src_tbl[i].len = sh->sources.entries[i].len;
|
||||
}
|
||||
}
|
||||
*n_src_out = n;
|
||||
|
||||
n = sh->vips.nelts < n_vip_alloc ? sh->vips.nelts : n_vip_alloc;
|
||||
for (i = 0; i < n; i++) {
|
||||
if (sh->vips.entries[i].len > 0) {
|
||||
d = ngx_pnalloc(r->pool, sh->vips.entries[i].len);
|
||||
if (d == NULL) return NGX_ERROR;
|
||||
ngx_memcpy(d, sh->vips.entries[i].data,
|
||||
sh->vips.entries[i].len);
|
||||
vip_tbl[i].data = d;
|
||||
vip_tbl[i].len = sh->vips.entries[i].len;
|
||||
}
|
||||
}
|
||||
*n_vip_out = n;
|
||||
|
||||
*naggs_out = naggs;
|
||||
return NGX_OK;
|
||||
}
|
||||
|
||||
@@ -2352,32 +2333,105 @@ ngx_http_ipng_stats_agg_alloc(ngx_http_request_t *r,
|
||||
}
|
||||
|
||||
|
||||
/* -- Prometheus ---------------------------------------------------- */
|
||||
|
||||
static ngx_int_t
|
||||
ngx_http_ipng_stats_prom_counters(ngx_http_request_t *r,
|
||||
ngx_http_ipng_stats_shctx_t *sh, ngx_http_ipng_stats_node_t *n,
|
||||
ngx_chain_t ***last, ngx_uint_t *emitted)
|
||||
/* Walk the LRU under the slab mutex, draining matching nodes into the
|
||||
* caller-supplied `snaps` array and aggregating histogram lanes into
|
||||
* `aggs`. All output buffers are pre-allocated by the caller — this
|
||||
* function does not touch r->pool, so a crash here cannot leave the
|
||||
* slab mutex held via a glibc abort.
|
||||
*
|
||||
* `src_tbl` / `vip_tbl` hold the deep-copied interning strings and are
|
||||
* used only for filter comparisons; the renderer downstream indexes
|
||||
* them again. Nodes whose ids fall outside the snapshotted range are
|
||||
* skipped (a belt-and-suspenders guard against a stale id after a
|
||||
* concurrent reload). */
|
||||
static void
|
||||
ngx_http_ipng_stats_snapshot_nodes(ngx_http_ipng_stats_shctx_t *sh,
|
||||
ngx_http_ipng_stats_main_conf_t *imcf,
|
||||
ngx_str_t *filter_src, ngx_str_t *filter_vip,
|
||||
ngx_str_t *src_tbl, ngx_uint_t n_src,
|
||||
ngx_str_t *vip_tbl, ngx_uint_t n_vip,
|
||||
ngx_http_ipng_stats_snap_t *snaps, ngx_uint_t nsnaps_alloc,
|
||||
ngx_uint_t *nsnaps_out,
|
||||
ngx_http_ipng_stats_agg_t *aggs, ngx_uint_t naggs_alloc,
|
||||
ngx_uint_t *naggs_out)
|
||||
{
|
||||
ngx_str_t *src = &sh->sources.entries[n->source_id];
|
||||
ngx_str_t *vip = &sh->vips.entries[n->vip_id];
|
||||
const char *cls = ngx_http_ipng_stats_class_label(n->class);
|
||||
ngx_chain_t *cl = ngx_http_ipng_stats_chain_buf(r, 1024);
|
||||
if (cl == NULL) return NGX_ERROR;
|
||||
cl->buf->last = ngx_sprintf(cl->buf->last,
|
||||
"nginx_ipng_requests_total{source_tag=\"%V\",vip=\"%V\",code=\"%s\"} %uA\n"
|
||||
"nginx_ipng_bytes_in_total{source_tag=\"%V\",vip=\"%V\",code=\"%s\"} %uA\n"
|
||||
"nginx_ipng_bytes_out_total{source_tag=\"%V\",vip=\"%V\",code=\"%s\"} %uA\n"
|
||||
"nginx_ipng_latency_total{source_tag=\"%V\",vip=\"%V\",code=\"%s\"} %.3f\n",
|
||||
src, vip, cls, n->requests,
|
||||
src, vip, cls, n->bytes_in,
|
||||
src, vip, cls, n->bytes_out,
|
||||
src, vip, cls, (double) n->duration_sum_ms / 1000.0);
|
||||
(*emitted)++;
|
||||
return ngx_http_ipng_stats_append(last, cl);
|
||||
ngx_queue_t *q;
|
||||
ngx_http_ipng_stats_node_t *n;
|
||||
ngx_str_t *src_entry, *vip_entry;
|
||||
ngx_atomic_uint_t *lanes, *blanes;
|
||||
ngx_http_ipng_stats_agg_t *a;
|
||||
ngx_uint_t i;
|
||||
ngx_uint_t nb = imcf->nbuckets;
|
||||
ngx_uint_t nbb = imcf->nbytebuckets;
|
||||
ngx_uint_t nsnaps = 0, naggs = 0;
|
||||
|
||||
for (q = ngx_queue_head(&sh->lru);
|
||||
q != ngx_queue_sentinel(&sh->lru);
|
||||
q = ngx_queue_next(q))
|
||||
{
|
||||
n = ngx_queue_data(q, ngx_http_ipng_stats_node_t, lru);
|
||||
if (n->source_id >= n_src || n->vip_id >= n_vip) continue;
|
||||
|
||||
src_entry = &src_tbl[n->source_id];
|
||||
vip_entry = &vip_tbl[n->vip_id];
|
||||
|
||||
if (filter_src->len > 0
|
||||
&& (src_entry->len != filter_src->len
|
||||
|| ngx_memcmp(src_entry->data, filter_src->data,
|
||||
filter_src->len) != 0))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (filter_vip->len > 0
|
||||
&& (vip_entry->len != filter_vip->len
|
||||
|| ngx_memcmp(vip_entry->data, filter_vip->data,
|
||||
filter_vip->len) != 0))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nsnaps < nsnaps_alloc) {
|
||||
snaps[nsnaps].source_id = n->source_id;
|
||||
snaps[nsnaps].vip_id = n->vip_id;
|
||||
snaps[nsnaps].class = n->class;
|
||||
snaps[nsnaps].requests = n->requests;
|
||||
snaps[nsnaps].bytes_in = n->bytes_in;
|
||||
snaps[nsnaps].bytes_out = n->bytes_out;
|
||||
snaps[nsnaps].duration_sum_ms = n->duration_sum_ms;
|
||||
snaps[nsnaps].upstream_sum_ms = n->upstream_sum_ms;
|
||||
nsnaps++;
|
||||
}
|
||||
|
||||
a = ngx_http_ipng_stats_agg_get(aggs, &naggs, naggs_alloc,
|
||||
n->source_id, n->vip_id);
|
||||
if (a == NULL) continue;
|
||||
a->duration_sum_ms += n->duration_sum_ms;
|
||||
a->upstream_sum_ms += n->upstream_sum_ms;
|
||||
a->bytes_in_sum += n->bytes_in;
|
||||
a->bytes_out_sum += n->bytes_out;
|
||||
a->req_total += n->requests;
|
||||
|
||||
lanes = (ngx_atomic_uint_t *) (n + 1);
|
||||
blanes = lanes + 2 * (nb + 1);
|
||||
for (i = 0; i <= nb; i++) {
|
||||
a->dhist[i] += lanes[i];
|
||||
a->uhist[i] += lanes[nb + 1 + i];
|
||||
a->up_total += lanes[nb + 1 + i];
|
||||
}
|
||||
for (i = 0; i <= nbb; i++) {
|
||||
a->bin_hist[i] += blanes[i];
|
||||
a->bout_hist[i] += blanes[nbb + 1 + i];
|
||||
}
|
||||
}
|
||||
|
||||
*nsnaps_out = nsnaps;
|
||||
*naggs_out = naggs;
|
||||
}
|
||||
|
||||
|
||||
/* -- Prometheus ---------------------------------------------------- */
|
||||
|
||||
|
||||
static u_char *
|
||||
ngx_http_ipng_stats_render_hist(u_char *p, const char *metric,
|
||||
ngx_str_t *src, ngx_str_t *vip, ngx_uint_t *bounds, ngx_uint_t nb,
|
||||
@@ -2422,12 +2476,16 @@ ngx_http_ipng_stats_render_prom(ngx_http_request_t *r,
|
||||
ngx_chain_t **last = &out;
|
||||
ngx_str_t ctype = ngx_string("text/plain; version=0.0.4");
|
||||
ngx_http_ipng_stats_agg_t *aggs;
|
||||
ngx_http_ipng_stats_snap_t *snaps;
|
||||
ngx_str_t *src_tbl = NULL, *vip_tbl = NULL;
|
||||
ngx_uint_t n_src_alloc, n_vip_alloc;
|
||||
ngx_uint_t n_src = 0, n_vip = 0;
|
||||
ngx_uint_t naggs = 0, naggs_alloc;
|
||||
ngx_uint_t emitted = 0, i;
|
||||
ngx_uint_t nsnaps = 0, nsnaps_alloc;
|
||||
ngx_uint_t i;
|
||||
ngx_uint_t nb = imcf->nbuckets;
|
||||
ngx_uint_t nbb = imcf->nbytebuckets;
|
||||
size_t hist_sz;
|
||||
ngx_int_t rc;
|
||||
|
||||
slab = (ngx_slab_pool_t *) imcf->shm_zone->shm.addr;
|
||||
|
||||
@@ -2449,29 +2507,92 @@ ngx_http_ipng_stats_render_prom(ngx_http_request_t *r,
|
||||
"# TYPE nginx_ipng_upstream_response_seconds histogram\n"
|
||||
"# HELP nginx_ipng_bytes_in Request size histogram in bytes.\n"
|
||||
"# TYPE nginx_ipng_bytes_in histogram\n"
|
||||
"# HELP nginx_ipng_bytes_out Response size histogram in bytes.\n"
|
||||
"# HELP nginx_ipng_bytes_out Request size histogram in bytes.\n"
|
||||
"# TYPE nginx_ipng_bytes_out histogram\n",
|
||||
NGX_HTTP_IPNG_STATS_VERSION, NGX_HTTP_IPNG_STATS_SCHEMA_VERSION);
|
||||
if (ngx_http_ipng_stats_append(&last, cl) != NGX_OK) {
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
|
||||
/* Read cardinality with a brief lock. We release before allocating
|
||||
* the snapshot buffers so glibc's allocator is never entered with
|
||||
* the slab mutex held — a worker crash during allocation cannot
|
||||
* leave the shared-memory zone locked. */
|
||||
ngx_shmtx_lock(&slab->mutex);
|
||||
n_src_alloc = sh->sources.nelts;
|
||||
n_vip_alloc = sh->vips.nelts;
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
|
||||
if (n_src_alloc > NGX_HTTP_IPNG_STATS_MAX_INTERN
|
||||
|| n_vip_alloc > NGX_HTTP_IPNG_STATS_MAX_INTERN)
|
||||
{
|
||||
ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
|
||||
"ipng_stats: render: refusing to render, cardinality "
|
||||
"out of range (sources=%ui, vips=%ui)",
|
||||
n_src_alloc, n_vip_alloc);
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
|
||||
naggs_alloc = n_src_alloc * n_vip_alloc;
|
||||
nsnaps_alloc = naggs_alloc * NGX_HTTP_IPNG_STATS_NCLASSES;
|
||||
if (naggs_alloc == 0) naggs_alloc = 1;
|
||||
if (nsnaps_alloc == 0) nsnaps_alloc = 1;
|
||||
|
||||
if (ngx_http_ipng_stats_agg_alloc(r, imcf, naggs_alloc,
|
||||
&aggs, &naggs_alloc) != NGX_OK)
|
||||
{
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
snaps = ngx_pcalloc(r->pool, nsnaps_alloc * sizeof(*snaps));
|
||||
if (n_src_alloc > 0) {
|
||||
src_tbl = ngx_pcalloc(r->pool, n_src_alloc * sizeof(ngx_str_t));
|
||||
if (src_tbl == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
if (n_vip_alloc > 0) {
|
||||
vip_tbl = ngx_pcalloc(r->pool, n_vip_alloc * sizeof(ngx_str_t));
|
||||
if (vip_tbl == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
if (snaps == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
|
||||
ngx_shmtx_lock(&slab->mutex);
|
||||
|
||||
if (ngx_http_ipng_stats_agg_alloc(r, imcf,
|
||||
sh->sources.nelts * sh->vips.nelts, &aggs, &naggs_alloc) != NGX_OK)
|
||||
if (ngx_http_ipng_stats_snapshot_strings(r, sh,
|
||||
src_tbl, n_src_alloc, &n_src,
|
||||
vip_tbl, n_vip_alloc, &n_vip) != NGX_OK)
|
||||
{
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
|
||||
rc = ngx_http_ipng_stats_walk_aggregate(r, imcf, filter_source, filter_vip,
|
||||
ngx_http_ipng_stats_prom_counters, &last, &emitted,
|
||||
ngx_http_ipng_stats_snapshot_nodes(sh, imcf,
|
||||
filter_source, filter_vip,
|
||||
src_tbl, n_src, vip_tbl, n_vip,
|
||||
snaps, nsnaps_alloc, &nsnaps,
|
||||
aggs, naggs_alloc, &naggs);
|
||||
if (rc != NGX_OK) {
|
||||
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
|
||||
/* Per-node counters. */
|
||||
for (i = 0; i < nsnaps; i++) {
|
||||
ngx_http_ipng_stats_snap_t *s = &snaps[i];
|
||||
ngx_str_t *src = &src_tbl[s->source_id];
|
||||
ngx_str_t *vip = &vip_tbl[s->vip_id];
|
||||
const char *cls = ngx_http_ipng_stats_class_label(s->class);
|
||||
cl = ngx_http_ipng_stats_chain_buf(r, 1024);
|
||||
if (cl == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
cl->buf->last = ngx_sprintf(cl->buf->last,
|
||||
"nginx_ipng_requests_total{source_tag=\"%V\",vip=\"%V\",code=\"%s\"} %uL\n"
|
||||
"nginx_ipng_bytes_in_total{source_tag=\"%V\",vip=\"%V\",code=\"%s\"} %uL\n"
|
||||
"nginx_ipng_bytes_out_total{source_tag=\"%V\",vip=\"%V\",code=\"%s\"} %uL\n"
|
||||
"nginx_ipng_latency_total{source_tag=\"%V\",vip=\"%V\",code=\"%s\"} %.3f\n",
|
||||
src, vip, cls, s->requests,
|
||||
src, vip, cls, s->bytes_in,
|
||||
src, vip, cls, s->bytes_out,
|
||||
src, vip, cls, (double) s->duration_sum_ms / 1000.0);
|
||||
if (ngx_http_ipng_stats_append(&last, cl) != NGX_OK) {
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/* One chain link per (source, vip) for the four aggregated histograms.
|
||||
* Size: per-bucket line ~96B, + sum/count/+Inf per metric ~96B each. */
|
||||
@@ -2479,15 +2600,12 @@ ngx_http_ipng_stats_render_prom(ngx_http_request_t *r,
|
||||
|
||||
for (i = 0; i < naggs; i++) {
|
||||
ngx_http_ipng_stats_agg_t *a = &aggs[i];
|
||||
ngx_str_t *src = &sh->sources.entries[a->source_id];
|
||||
ngx_str_t *vip = &sh->vips.entries[a->vip_id];
|
||||
ngx_str_t *src = &src_tbl[a->source_id];
|
||||
ngx_str_t *vip = &vip_tbl[a->vip_id];
|
||||
u_char *p;
|
||||
|
||||
cl = ngx_http_ipng_stats_chain_buf(r, hist_sz);
|
||||
if (cl == NULL) {
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
if (cl == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
p = cl->buf->last;
|
||||
p = ngx_http_ipng_stats_render_hist(p,
|
||||
"nginx_ipng_request_duration_seconds", src, vip,
|
||||
@@ -2507,33 +2625,16 @@ ngx_http_ipng_stats_render_prom(ngx_http_request_t *r,
|
||||
(double) a->bytes_out_sum, 0);
|
||||
cl->buf->last = p;
|
||||
if (ngx_http_ipng_stats_append(&last, cl) != NGX_OK) {
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
return ngx_http_ipng_stats_send(r, &ctype, out);
|
||||
}
|
||||
|
||||
|
||||
/* -- JSON ---------------------------------------------------------- */
|
||||
|
||||
/* Per-(source, vip, class) counter group. We render one JSON object per
|
||||
* aggregated (source, vip) record, with the class breakdown stored in
|
||||
* an interim table while the walk is in progress. */
|
||||
typedef struct {
|
||||
ngx_uint_t source_id;
|
||||
ngx_uint_t vip_id;
|
||||
ngx_uint_t class;
|
||||
uint64_t requests;
|
||||
uint64_t bytes_in;
|
||||
uint64_t bytes_out;
|
||||
uint64_t duration_sum_ms;
|
||||
uint64_t upstream_sum_ms;
|
||||
} ngx_http_ipng_stats_jnode_t;
|
||||
|
||||
|
||||
static ngx_int_t
|
||||
ngx_http_ipng_stats_render_json(ngx_http_request_t *r,
|
||||
ngx_http_ipng_stats_main_conf_t *imcf,
|
||||
@@ -2545,16 +2646,15 @@ ngx_http_ipng_stats_render_json(ngx_http_request_t *r,
|
||||
ngx_chain_t **last = &out;
|
||||
ngx_str_t ctype = ngx_string("application/json");
|
||||
ngx_http_ipng_stats_agg_t *aggs;
|
||||
ngx_http_ipng_stats_jnode_t *jnodes;
|
||||
ngx_uint_t njnodes = 0, njnodes_alloc;
|
||||
ngx_http_ipng_stats_snap_t *snaps;
|
||||
ngx_str_t *src_tbl = NULL, *vip_tbl = NULL;
|
||||
ngx_uint_t n_src_alloc, n_vip_alloc;
|
||||
ngx_uint_t n_src = 0, n_vip = 0;
|
||||
ngx_uint_t nsnaps = 0, nsnaps_alloc;
|
||||
ngx_uint_t naggs = 0, naggs_alloc;
|
||||
ngx_uint_t i, j, emitted = 0;
|
||||
ngx_uint_t nb = imcf->nbuckets;
|
||||
ngx_uint_t nbb = imcf->nbytebuckets;
|
||||
ngx_queue_t *q;
|
||||
ngx_http_ipng_stats_node_t *nd;
|
||||
ngx_str_t *src_entry, *vip_entry;
|
||||
ngx_atomic_uint_t *lanes, *blanes;
|
||||
ngx_http_ipng_stats_agg_t *a;
|
||||
size_t rec_sz;
|
||||
|
||||
@@ -2570,85 +2670,58 @@ ngx_http_ipng_stats_render_json(ngx_http_request_t *r,
|
||||
}
|
||||
|
||||
ngx_shmtx_lock(&slab->mutex);
|
||||
n_src_alloc = sh->sources.nelts;
|
||||
n_vip_alloc = sh->vips.nelts;
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
|
||||
if (n_src_alloc > NGX_HTTP_IPNG_STATS_MAX_INTERN
|
||||
|| n_vip_alloc > NGX_HTTP_IPNG_STATS_MAX_INTERN)
|
||||
{
|
||||
ngx_log_error(NGX_LOG_ERR, r->connection->log, 0,
|
||||
"ipng_stats: render: refusing to render, cardinality "
|
||||
"out of range (sources=%ui, vips=%ui)",
|
||||
n_src_alloc, n_vip_alloc);
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
|
||||
naggs_alloc = n_src_alloc * n_vip_alloc;
|
||||
nsnaps_alloc = naggs_alloc * NGX_HTTP_IPNG_STATS_NCLASSES;
|
||||
if (naggs_alloc == 0) naggs_alloc = 1;
|
||||
if (nsnaps_alloc == 0) nsnaps_alloc = 1;
|
||||
|
||||
naggs_alloc = sh->sources.nelts * sh->vips.nelts;
|
||||
if (ngx_http_ipng_stats_agg_alloc(r, imcf, naggs_alloc,
|
||||
&aggs, &naggs_alloc) != NGX_OK)
|
||||
{
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
snaps = ngx_pcalloc(r->pool, nsnaps_alloc * sizeof(*snaps));
|
||||
if (n_src_alloc > 0) {
|
||||
src_tbl = ngx_pcalloc(r->pool, n_src_alloc * sizeof(ngx_str_t));
|
||||
if (src_tbl == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
if (n_vip_alloc > 0) {
|
||||
vip_tbl = ngx_pcalloc(r->pool, n_vip_alloc * sizeof(ngx_str_t));
|
||||
if (vip_tbl == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
if (snaps == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
|
||||
ngx_shmtx_lock(&slab->mutex);
|
||||
|
||||
if (ngx_http_ipng_stats_snapshot_strings(r, sh,
|
||||
src_tbl, n_src_alloc, &n_src,
|
||||
vip_tbl, n_vip_alloc, &n_vip) != NGX_OK)
|
||||
{
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
|
||||
/* Upper bound on jnodes = naggs_alloc * NCLASSES. */
|
||||
njnodes_alloc = naggs_alloc * NGX_HTTP_IPNG_STATS_NCLASSES;
|
||||
if (njnodes_alloc == 0) njnodes_alloc = 1;
|
||||
jnodes = ngx_pcalloc(r->pool, njnodes_alloc * sizeof(*jnodes));
|
||||
if (jnodes == NULL) {
|
||||
ngx_http_ipng_stats_snapshot_nodes(sh, imcf,
|
||||
filter_source, filter_vip,
|
||||
src_tbl, n_src, vip_tbl, n_vip,
|
||||
snaps, nsnaps_alloc, &nsnaps,
|
||||
aggs, naggs_alloc, &naggs);
|
||||
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
|
||||
for (q = ngx_queue_head(&sh->lru);
|
||||
q != ngx_queue_sentinel(&sh->lru);
|
||||
q = ngx_queue_next(q))
|
||||
{
|
||||
nd = ngx_queue_data(q, ngx_http_ipng_stats_node_t, lru);
|
||||
if (nd->source_id >= sh->sources.nelts
|
||||
|| nd->vip_id >= sh->vips.nelts)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
src_entry = &sh->sources.entries[nd->source_id];
|
||||
vip_entry = &sh->vips.entries[nd->vip_id];
|
||||
|
||||
if (filter_source->len > 0
|
||||
&& (src_entry->len != filter_source->len
|
||||
|| ngx_memcmp(src_entry->data, filter_source->data,
|
||||
filter_source->len) != 0))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (filter_vip->len > 0
|
||||
&& (vip_entry->len != filter_vip->len
|
||||
|| ngx_memcmp(vip_entry->data, filter_vip->data,
|
||||
filter_vip->len) != 0))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (njnodes < njnodes_alloc) {
|
||||
jnodes[njnodes].source_id = nd->source_id;
|
||||
jnodes[njnodes].vip_id = nd->vip_id;
|
||||
jnodes[njnodes].class = nd->class;
|
||||
jnodes[njnodes].requests = nd->requests;
|
||||
jnodes[njnodes].bytes_in = nd->bytes_in;
|
||||
jnodes[njnodes].bytes_out = nd->bytes_out;
|
||||
jnodes[njnodes].duration_sum_ms = nd->duration_sum_ms;
|
||||
jnodes[njnodes].upstream_sum_ms = nd->upstream_sum_ms;
|
||||
njnodes++;
|
||||
}
|
||||
|
||||
a = ngx_http_ipng_stats_agg_get(aggs, &naggs, naggs_alloc,
|
||||
nd->source_id, nd->vip_id);
|
||||
if (a == NULL) continue;
|
||||
a->duration_sum_ms += nd->duration_sum_ms;
|
||||
a->upstream_sum_ms += nd->upstream_sum_ms;
|
||||
a->bytes_in_sum += nd->bytes_in;
|
||||
a->bytes_out_sum += nd->bytes_out;
|
||||
a->req_total += nd->requests;
|
||||
|
||||
lanes = (ngx_atomic_uint_t *) (nd + 1);
|
||||
blanes = lanes + 2 * (nb + 1);
|
||||
for (i = 0; i <= nb; i++) {
|
||||
a->dhist[i] += lanes[i];
|
||||
a->uhist[i] += lanes[nb + 1 + i];
|
||||
a->up_total += lanes[nb + 1 + i];
|
||||
}
|
||||
for (i = 0; i <= nbb; i++) {
|
||||
a->bin_hist[i] += blanes[i];
|
||||
a->bout_hist[i] += blanes[nbb + 1 + i];
|
||||
}
|
||||
}
|
||||
|
||||
/* One JSON record per aggregated (source, vip). Size upper-bound
|
||||
* accounts for: fixed overhead, up to NCLASSES class entries, 4
|
||||
@@ -2660,33 +2733,30 @@ ngx_http_ipng_stats_render_json(ngx_http_request_t *r,
|
||||
|
||||
for (i = 0; i < naggs; i++) {
|
||||
a = &aggs[i];
|
||||
ngx_str_t *src = &sh->sources.entries[a->source_id];
|
||||
ngx_str_t *vip = &sh->vips.entries[a->vip_id];
|
||||
ngx_str_t *src = &src_tbl[a->source_id];
|
||||
ngx_str_t *vip = &vip_tbl[a->vip_id];
|
||||
u_char *p;
|
||||
ngx_uint_t first_class = 1;
|
||||
|
||||
cl = ngx_http_ipng_stats_chain_buf(r, rec_sz);
|
||||
if (cl == NULL) {
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
if (cl == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
p = cl->buf->last;
|
||||
p = ngx_sprintf(p,
|
||||
"%s{\"source_tag\":\"%V\",\"vip\":\"%V\",\"classes\":{",
|
||||
(emitted == 0) ? "" : ",", src, vip);
|
||||
|
||||
for (j = 0; j < njnodes; j++) {
|
||||
if (jnodes[j].source_id != a->source_id
|
||||
|| jnodes[j].vip_id != a->vip_id) continue;
|
||||
for (j = 0; j < nsnaps; j++) {
|
||||
if (snaps[j].source_id != a->source_id
|
||||
|| snaps[j].vip_id != a->vip_id) continue;
|
||||
p = ngx_sprintf(p,
|
||||
"%s\"%s\":{\"requests\":%uL,\"bytes_in\":%uL,"
|
||||
"\"bytes_out\":%uL,\"latency_ms\":%uL,"
|
||||
"\"upstream_latency_ms\":%uL}",
|
||||
first_class ? "" : ",",
|
||||
ngx_http_ipng_stats_class_label(jnodes[j].class),
|
||||
jnodes[j].requests, jnodes[j].bytes_in,
|
||||
jnodes[j].bytes_out, jnodes[j].duration_sum_ms,
|
||||
jnodes[j].upstream_sum_ms);
|
||||
ngx_http_ipng_stats_class_label(snaps[j].class),
|
||||
snaps[j].requests, snaps[j].bytes_in,
|
||||
snaps[j].bytes_out, snaps[j].duration_sum_ms,
|
||||
snaps[j].upstream_sum_ms);
|
||||
first_class = 0;
|
||||
}
|
||||
p = ngx_sprintf(p, "},\"request_duration_ms\":{\"sum\":%uL,"
|
||||
@@ -2741,14 +2811,11 @@ ngx_http_ipng_stats_render_json(ngx_http_request_t *r,
|
||||
|
||||
cl->buf->last = p;
|
||||
if (ngx_http_ipng_stats_append(&last, cl) != NGX_OK) {
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
}
|
||||
emitted++;
|
||||
}
|
||||
|
||||
ngx_shmtx_unlock(&slab->mutex);
|
||||
|
||||
cl = ngx_http_ipng_stats_chain_buf(r, 8);
|
||||
if (cl == NULL) return NGX_HTTP_INTERNAL_SERVER_ERROR;
|
||||
cl->buf->last = ngx_sprintf(cl->buf->last, "]}\n");
|
||||
|
||||
@@ -201,6 +201,7 @@ Request count accuracy
|
||||
# --- Lab lifecycle ---
|
||||
|
||||
Deploy Lab
|
||||
Require Deb Build
|
||||
Run ${CLAB_BIN} --runtime ${runtime} destroy -t ${CURDIR}/${lab-file} --cleanup 2>&1 || true
|
||||
${rc} ${output} = Run And Return Rc And Output
|
||||
... ${CLAB_BIN} --runtime ${runtime} deploy -t ${CURDIR}/${lab-file}
|
||||
@@ -210,6 +211,16 @@ Deploy Lab
|
||||
Wait Until Keyword Succeeds 60s 3s Client Can Reach Server ${CLIENT1} 10.0.1.1
|
||||
Wait Until Keyword Succeeds 60s 3s Client Can Reach Server ${CLIENT2} 10.0.2.1
|
||||
|
||||
Require Deb Build
|
||||
[Documentation] Fail fast with an actionable message if the user
|
||||
... forgot to run `make pkg-deb` before invoking this
|
||||
... suite. The server container dpkg-installs the
|
||||
... built .deb via its bind-mount of build/.
|
||||
${rc} ${output} = Run And Return Rc And Output
|
||||
... bash -c 'ls ${EXECDIR}/build/libnginx-mod-http-ipng-stats_*.deb 2>/dev/null'
|
||||
Run Keyword If ${rc} != 0
|
||||
... Fail Module .deb not found — run `make pkg-deb` first.
|
||||
|
||||
Server Is Ready
|
||||
${rc} ${output} = Run And Return Rc And Output
|
||||
... curl -sf ${SCRAPE_URL}
|
||||
|
||||
184
tests/02-asan/02-asan.robot
Normal file
184
tests/02-asan/02-asan.robot
Normal file
@@ -0,0 +1,184 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
*** Settings ***
|
||||
Documentation AddressSanitizer + UBSan stress suite for
|
||||
... ngx_http_ipng_stats_module. Deploys a 2-node containerlab
|
||||
... topology running an ASan-instrumented nginx (built by
|
||||
... `make build-asan`), exercises the code paths most likely
|
||||
... to surface memory errors — shared-zone init and reuse,
|
||||
... scrape rendering under the slab mutex, log-phase
|
||||
... interning, logtail UDP flush — and fails if any
|
||||
... AddressSanitizer or UBSan finding appears in the nginx
|
||||
... stderr during the run.
|
||||
...
|
||||
... This suite is deliberately not a superset of 01-module —
|
||||
... it's a landing zone for memory-correctness cases.
|
||||
... Functional coverage (attribution, filters, counters)
|
||||
... lives in 01-module.
|
||||
Library OperatingSystem
|
||||
Library String
|
||||
Suite Setup Deploy Lab
|
||||
Suite Teardown Cleanup Lab
|
||||
Test Teardown Assert No Sanitizer Findings
|
||||
|
||||
*** Variables ***
|
||||
${lab-name} ipng-stats-asan
|
||||
${lab-file} lab/ipng-stats-asan.clab.yml
|
||||
${runtime} docker
|
||||
${CLAB_BIN} sudo containerlab
|
||||
${SERVER} clab-${lab-name}-server
|
||||
${CLIENT} clab-${lab-name}-client
|
||||
${SCRAPE_URL} http://172.20.41.2:9113/stats
|
||||
${DATAPLANE_URL} http://10.0.1.1:8080
|
||||
${STRESS_RELOADS} 10
|
||||
${STRESS_REQ_PER_LOOP} 25
|
||||
|
||||
*** Test Cases ***
|
||||
|
||||
ASan nginx starts and serves a scrape
|
||||
[Documentation] The ASan-instrumented nginx boots with the module
|
||||
... loaded, and a bare scrape returns the expected
|
||||
... preamble. Touches init_zone, postconfig, and the
|
||||
... scrape renderer with an empty LRU.
|
||||
${output} = Scrape Prometheus
|
||||
Should Contain ${output} nginx-ipng-stats-plugin
|
||||
Should Contain ${output} nginx_ipng_requests_total
|
||||
|
||||
Scrape an empty JSON report
|
||||
[Documentation] JSON renderer path with zero records — catches
|
||||
... off-by-one errors in the bracket emission.
|
||||
${rc} ${output} = Run And Return Rc And Output
|
||||
... curl -sf -H 'Accept: application/json' ${SCRAPE_URL}
|
||||
Should Be Equal As Integers ${rc} 0
|
||||
Should Contain ${output} "schema":2
|
||||
Should Contain ${output} "records":[
|
||||
|
||||
Reload storm without traffic
|
||||
[Documentation] Back-to-back reloads with no traffic in between.
|
||||
... Exercises init_zone's zone-reuse branch and the
|
||||
... shctx magic check; the cardinality is zero so the
|
||||
... renderer's naggs_alloc == 0 path is also covered.
|
||||
FOR ${i} IN RANGE ${STRESS_RELOADS}
|
||||
Docker Exec ${SERVER} ngxasan -s reload
|
||||
Sleep 200ms
|
||||
Scrape Prometheus
|
||||
END
|
||||
|
||||
Reload storm with interleaved traffic
|
||||
[Documentation] Generate traffic, reload, scrape, repeat. This is
|
||||
... the scenario that surfaced the original crash: the
|
||||
... scrape path walks the shared-zone LRU while workers
|
||||
... are being cycled. Also grows the interning table
|
||||
... by using a handful of distinct paths.
|
||||
FOR ${i} IN RANGE ${STRESS_RELOADS}
|
||||
Generate Traffic ${STRESS_REQ_PER_LOOP}
|
||||
Docker Exec ${SERVER} ngxasan -s reload
|
||||
Sleep 200ms
|
||||
Scrape Prometheus
|
||||
END
|
||||
|
||||
Concurrent scrape during reload
|
||||
[Documentation] Scrape in a tight loop while issuing reloads from
|
||||
... a parallel shell. The renderer's snapshot step
|
||||
... deep-copies strings under the slab mutex; a
|
||||
... concurrent intern_shared grow during that window
|
||||
... would surface here as use-after-free. We run the
|
||||
... whole dance in one bash -c so Robot doesn't have
|
||||
... to babysit the background pid.
|
||||
Generate Traffic ${STRESS_REQ_PER_LOOP}
|
||||
${rc} ${output} = Run And Return Rc And Output
|
||||
... bash -c '( for i in $(seq 1 200); do curl -sf ${SCRAPE_URL} > /dev/null || true; done ) & scraper=$!; for i in 1 2 3 4 5; do docker exec ${SERVER} ngxasan -s reload; sleep 0.3; done; wait $scraper'
|
||||
Should Be Equal As Integers ${rc} 0
|
||||
|
||||
Large cardinality intern table growth
|
||||
[Documentation] Drive enough distinct request paths that the
|
||||
... per-VIP vip/source interning array grows past its
|
||||
... initial slab_alloc — this exercises the realloc
|
||||
... path (ngx_slab_free_locked of the old entries
|
||||
... buffer, copy into the new one) inside the log
|
||||
... handler.
|
||||
FOR ${i} IN RANGE 60
|
||||
Docker Exec Ignore Rc ${CLIENT} curl -s ${DATAPLANE_URL}/path${i}
|
||||
END
|
||||
Sleep 500ms
|
||||
Scrape Prometheus
|
||||
|
||||
*** Keywords ***
|
||||
|
||||
# --- Lab lifecycle ---
|
||||
|
||||
Deploy Lab
|
||||
Require ASan Build
|
||||
Run ${CLAB_BIN} --runtime ${runtime} destroy -t ${CURDIR}/${lab-file} --cleanup 2>&1 || true
|
||||
${rc} ${output} = Run And Return Rc And Output
|
||||
... ${CLAB_BIN} --runtime ${runtime} deploy -t ${CURDIR}/${lab-file}
|
||||
Log ${output}
|
||||
Should Be Equal As Integers ${rc} 0
|
||||
Wait Until Keyword Succeeds 90s 3s Server Is Ready
|
||||
Wait Until Keyword Succeeds 60s 3s Client Can Reach Server
|
||||
|
||||
Require ASan Build
|
||||
[Documentation] Fail fast with an actionable message if the user
|
||||
... forgot to run `make build-asan` before invoking
|
||||
... this suite.
|
||||
${rc} = Run And Return Rc test -x ${EXECDIR}/build/nginx-asan/sbin/nginx
|
||||
Run Keyword If ${rc} != 0
|
||||
... Fail ASan nginx not found — run `make build-asan` first.
|
||||
|
||||
Server Is Ready
|
||||
${rc} ${output} = Run And Return Rc And Output curl -sf ${SCRAPE_URL}
|
||||
Should Be Equal As Integers ${rc} 0
|
||||
|
||||
Client Can Reach Server
|
||||
${rc} ${output} = Run And Return Rc And Output
|
||||
... docker exec ${CLIENT} curl -sf ${DATAPLANE_URL}/
|
||||
Should Be Equal As Integers ${rc} 0
|
||||
|
||||
Cleanup Lab
|
||||
Run docker logs ${SERVER} > ${EXECDIR}/tests/out/asan-server-docker.log 2>&1
|
||||
Run docker exec ${SERVER} cat /tmp/nginx.err > ${EXECDIR}/tests/out/asan-nginx-err.log 2>&1
|
||||
Run docker exec ${SERVER} cat /tmp/nginx.stderr > ${EXECDIR}/tests/out/asan-nginx-stderr.log 2>&1
|
||||
Run docker exec ${SERVER} bash -c 'cat /tmp/asan.* 2>/dev/null; cat /tmp/ubsan.* 2>/dev/null' > ${EXECDIR}/tests/out/asan-reports.log 2>&1
|
||||
Run ${CLAB_BIN} --runtime ${runtime} destroy -t ${CURDIR}/${lab-file} --cleanup
|
||||
|
||||
# --- Sanitizer assertion ---
|
||||
|
||||
Assert No Sanitizer Findings
|
||||
[Documentation] Fail the current test if the ASan or UBSan
|
||||
... runtime wrote any findings to stderr or their
|
||||
... per-pid log files. Runs after every test case —
|
||||
... we want the failing test to be the one that
|
||||
... produced the finding, not a later one.
|
||||
${rc} ${hits} = Run And Return Rc And Output
|
||||
... docker exec ${SERVER} bash -c 'grep -E "AddressSanitizer|LeakSanitizer|runtime error|SUMMARY:" /tmp/nginx.stderr /tmp/asan.* /tmp/ubsan.* 2>/dev/null || true'
|
||||
Run Keyword If '${hits}' != '${EMPTY}'
|
||||
... Fail Sanitizer findings detected:\n${hits}
|
||||
|
||||
# --- Traffic generation ---
|
||||
|
||||
Generate Traffic
|
||||
[Arguments] ${count}
|
||||
FOR ${i} IN RANGE ${count}
|
||||
Docker Exec Ignore Rc ${CLIENT} curl -s ${DATAPLANE_URL}/
|
||||
END
|
||||
|
||||
# --- Scraping ---
|
||||
|
||||
Scrape Prometheus
|
||||
${rc} ${output} = Run And Return Rc And Output curl -sf ${SCRAPE_URL}
|
||||
Should Be Equal As Integers ${rc} 0
|
||||
RETURN ${output}
|
||||
|
||||
# --- Container helpers ---
|
||||
|
||||
Docker Exec
|
||||
[Arguments] ${container} ${cmd}
|
||||
${rc} ${output} = Run And Return Rc And Output
|
||||
... docker exec ${container} ${cmd}
|
||||
Should Be Equal As Integers ${rc} 0
|
||||
RETURN ${output}
|
||||
|
||||
Docker Exec Ignore Rc
|
||||
[Arguments] ${container} ${cmd}
|
||||
${rc} ${output} = Run And Return Rc And Output
|
||||
... docker exec ${container} ${cmd}
|
||||
RETURN ${output}
|
||||
23
tests/02-asan/lab/client/start.sh
Executable file
23
tests/02-asan/lab/client/start.sh
Executable file
@@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Client container entrypoint for the ASan test suite. Identical in
|
||||
# spirit to tests/01-module/lab/client/start.sh — kept as a separate
|
||||
# file so this suite's lab can be torn down and redeployed without
|
||||
# affecting 01-module state.
|
||||
|
||||
set -e
|
||||
|
||||
apt-get update -qq
|
||||
apt-get install -y -qq curl iproute2 > /dev/null 2>&1
|
||||
|
||||
echo "Waiting for eth1 ..."
|
||||
while ! ip link show eth1 > /dev/null 2>&1; do
|
||||
sleep 0.2
|
||||
done
|
||||
ip link set eth1 up
|
||||
ip addr add ${MY_IP} dev eth1
|
||||
|
||||
# Drop the mgmt default route so data-plane traffic goes out eth1.
|
||||
ip route del default 2>/dev/null || true
|
||||
|
||||
exec sleep infinity
|
||||
46
tests/02-asan/lab/ipng-stats-asan.clab.yml
Normal file
46
tests/02-asan/lab/ipng-stats-asan.clab.yml
Normal file
@@ -0,0 +1,46 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Containerlab topology for the AddressSanitizer/UBSan test suite.
|
||||
#
|
||||
# The server container bind-mounts build/nginx-asan/ — the
|
||||
# sanitizer-instrumented nginx built by `make build-asan`. The binary
|
||||
# was compiled against host glibc, so the container image must match
|
||||
# the host's Debian release (trixie/13) for the .so and libasan to be
|
||||
# ABI-compatible. The binary is run directly (no .deb install): the
|
||||
# `make pkg-deb` path is exercised by tests/01-module/.
|
||||
#
|
||||
# Topology: one server + one client with a single data-plane link.
|
||||
# Unlike 01-module we don't need multi-interface attribution here —
|
||||
# this suite is focused on memory correctness, not traffic tagging.
|
||||
|
||||
name: ipng-stats-asan
|
||||
|
||||
mgmt:
|
||||
network: ipng-stats-asan-net
|
||||
ipv4-subnet: 172.20.41.0/24
|
||||
|
||||
topology:
|
||||
nodes:
|
||||
server:
|
||||
kind: linux
|
||||
image: debian:trixie-slim
|
||||
mgmt-ipv4: 172.20.41.2
|
||||
binds:
|
||||
# RW because nginx chowns client_body_temp/ and writes to logs/
|
||||
# on master startup; it's a build artifact so we don't mind.
|
||||
- ../../../build/nginx-asan:/opt/nginx-asan
|
||||
- ./server/nginx.conf:/opt/nginx-asan/conf/nginx.conf:ro
|
||||
- ./server/start.sh:/start.sh:ro
|
||||
cmd: bash /start.sh
|
||||
|
||||
client:
|
||||
kind: linux
|
||||
image: debian:trixie-slim
|
||||
mgmt-ipv4: 172.20.41.11
|
||||
binds:
|
||||
- ./client/start.sh:/start.sh:ro
|
||||
cmd: bash /start.sh
|
||||
env:
|
||||
MY_IP: 10.0.1.2/24
|
||||
|
||||
links:
|
||||
- endpoints: ["server:eth1", "client:eth1"]
|
||||
50
tests/02-asan/lab/server/nginx.conf
Normal file
50
tests/02-asan/lab/server/nginx.conf
Normal file
@@ -0,0 +1,50 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Minimal nginx config for the ASan test suite. Exercises the code paths
|
||||
# most likely to surface memory errors: shared-zone init/reload, the
|
||||
# scrape renderer (under slab mutex), the log-phase handler's interning,
|
||||
# and logtail UDP buffering.
|
||||
|
||||
load_module /opt/nginx-asan/modules/ngx_http_ipng_stats_module.so;
|
||||
|
||||
daemon off;
|
||||
master_process on;
|
||||
worker_processes 2;
|
||||
pid /tmp/nginx.pid;
|
||||
error_log /tmp/nginx.err info;
|
||||
|
||||
events {
|
||||
worker_connections 128;
|
||||
}
|
||||
|
||||
http {
|
||||
access_log off;
|
||||
ipng_stats_zone ipng:1m;
|
||||
ipng_stats_flush_interval 300ms;
|
||||
ipng_stats_default_source direct;
|
||||
|
||||
log_format logtail '$remote_addr\t$request_method\t$request_uri\t$status';
|
||||
ipng_stats_logtail logtail udp://127.0.0.1:9514 buffer=4k flush=300ms;
|
||||
|
||||
server {
|
||||
# Mgmt scrape endpoint.
|
||||
listen 172.20.41.2:9113;
|
||||
|
||||
location = /stats {
|
||||
ipng_stats;
|
||||
allow all;
|
||||
}
|
||||
}
|
||||
|
||||
server {
|
||||
# Data plane — client traffic lands here.
|
||||
listen 10.0.1.1:8080 device=eth1 ipng_source_tag=cl1;
|
||||
listen 172.20.41.2:8080;
|
||||
|
||||
location / {
|
||||
return 200 "ok $server_addr\n";
|
||||
}
|
||||
location /notfound {
|
||||
return 404 "nope\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
61
tests/02-asan/lab/server/start.sh
Executable file
61
tests/02-asan/lab/server/start.sh
Executable file
@@ -0,0 +1,61 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Server container entrypoint for the ASan test suite. Installs libasan
|
||||
# runtime (the sanitizer-instrumented binary was linked against host
|
||||
# gcc's libasan.so.8), wires up the data-plane interface, and execs the
|
||||
# ASan nginx in the foreground with stderr captured so the Robot suite
|
||||
# can grep for AddressSanitizer/UBSan findings at teardown.
|
||||
|
||||
set -e
|
||||
|
||||
apt-get update -qq
|
||||
apt-get install -y -qq libasan8 libubsan1 ncat iproute2 curl > /dev/null 2>&1
|
||||
|
||||
# Wait for containerlab to attach the data-plane veth, configure the IP.
|
||||
echo "Waiting for eth1 ..."
|
||||
while ! ip link show eth1 > /dev/null 2>&1; do
|
||||
sleep 0.2
|
||||
done
|
||||
ip link set eth1 up
|
||||
ip addr add 10.0.1.1/24 dev eth1
|
||||
|
||||
# UDP logtail listener — drains the module's datagrams so sendto() has
|
||||
# a real destination. The test doesn't assert on this file's contents
|
||||
# (01-module already covers logtail semantics); we just need the socket
|
||||
# to exist so ASan sees a complete write/flush cycle in the module.
|
||||
mkdir -p /var/log/nginx
|
||||
ncat -u -l -k 127.0.0.1 9514 --recv-only >> /var/log/nginx/logtail-udp.log &
|
||||
|
||||
# ASan options:
|
||||
# detect_odr_violation=0 — nginx intentionally duplicates symbols like
|
||||
# ngx_module_names between the main binary and each dynamic module.
|
||||
# abort_on_error=1, halt_on_error=1 — fail fast so the Robot suite
|
||||
# sees the exit status and the ASan report is preserved at the tail
|
||||
# of /tmp/nginx.stderr.
|
||||
# detect_leaks=0 — nginx exits without running its pool destructors in
|
||||
# many paths; leak detection is not the goal here.
|
||||
# log_path — ASan writes each finding to this prefix + pid, so even
|
||||
# when nginx wipes its own error log on reload the ASan traces
|
||||
# survive for post-run inspection.
|
||||
ASAN_OPTS="detect_odr_violation=0:abort_on_error=1:halt_on_error=1:detect_leaks=0:log_path=/tmp/asan"
|
||||
UBSAN_OPTS="print_stacktrace=1:halt_on_error=0:log_path=/tmp/ubsan"
|
||||
|
||||
# Wrapper so every subsequent `docker exec ... ngxasan ...` (e.g. the
|
||||
# reload signal from the Robot suite) inherits the same sanitizer
|
||||
# settings. `docker exec` does not carry the master's env.
|
||||
cat > /usr/local/bin/ngxasan <<EOF
|
||||
#!/bin/bash
|
||||
export ASAN_OPTIONS="${ASAN_OPTS}"
|
||||
export UBSAN_OPTIONS="${UBSAN_OPTS}"
|
||||
exec /opt/nginx-asan/sbin/nginx -p /opt/nginx-asan -c /opt/nginx-asan/conf/nginx.conf "\$@"
|
||||
EOF
|
||||
chmod +x /usr/local/bin/ngxasan
|
||||
|
||||
export ASAN_OPTIONS="${ASAN_OPTS}"
|
||||
export UBSAN_OPTIONS="${UBSAN_OPTS}"
|
||||
|
||||
# Tee stderr so both docker logs and /tmp/nginx.stderr see it. The
|
||||
# Robot suite inspects the file; ASan writes its report to stderr
|
||||
# before abort_on_error kicks the process.
|
||||
exec /opt/nginx-asan/sbin/nginx -p /opt/nginx-asan -c /opt/nginx-asan/conf/nginx.conf \
|
||||
2> >(tee /tmp/nginx.stderr >&2)
|
||||
Reference in New Issue
Block a user